Parsing URIs

Title: Parsing URI's Question: Have you ever needed to break apart a URI to get its elements Answer: NOTE: if you have a Internet library then you already have URI parsing functions, however this may serve as an alternate way, a check on how the parsing algorithm works, a way to spend a credit article, a way to flame somebody else coding, etc on Indy(Internet Direct) for URI parsing check TIdURI class on idURI unit on TurboPower Internet Professional for IpMisc unit function IpParseURL Dont know about ICS though A URI is the way that an internet address presents itself, most protocols follow a URI. A URI has the following syntax: [PROTOCOL + ://]HOST[:PORT][SUBDIRS][DOCUMENT][#+BOOKMARK] Now here is the way I came up for URI parsing, the code is commented function IsNumber(ACharacter: Char): Boolean; begin Result := Pos(ACharacter, '0123456789') 0; end; {Parse params from a URL into a TStrings EG. Dest[0] = 'p=britney+spears' To access params and values check TStringList.Names and TStringList.Values} procedure SplitParams(const Params: String; Dest: TStrings); var p: Integer; Tmp: String; begin if not Assigned(Dest) then Exit; Dest.Clear; Tmp := Params; p := Pos('&', Tmp); while (p 0) do begin Dest.Add(Copy(Tmp, 1, p -1)); Tmp := Copy(Tmp, P+1, Length(Tmp) - p); p := Pos('&', Tmp); end; Dest.Add(Tmp); end; {Given an URL check for a query, return the query starting position, and the params in Params, this function uses SplitParams} procedure ProcessQuery(const URL: String; Params: TStrings; var QueryPos: Integer); begin //Anything after the ? are params so send them to splitparams QueryPos := Pos('?', URL); if QueryPos 0 then SplitParams(Copy(URL, QueryPos+1, Length(URL) -QueryPos), Params); end; {Given an URL get the document name there are some special cases to this: 1) if URL is of form there is no document but when queried to the server it will return one, so return no document 2) if URL is of form or or then document is index.php forget anything else} function QueryDocument(const URL: String; var DocPos: Integer): String; var QueryPos, i: Integer; begin Result := ''; //check for parameters ProcessQuery(URL, nil, QueryPos); if (QueryPos 0) then begin dec(QueryPos); i := QueryPos; while URL[QueryPos] '/' do dec(i); inc(i); Result := Copy(URL, i +1, QueryPos); end else begin {QueryPos is not found try to get either a docname or check if no document at hand} i := Length(URL); while URL[i] '/' do dec(i); inc(i); DocPos := i; if (i = Pos('://', URL)+3) then Result := Copy(URL, i, Length(URL)) else begin //No document or url of the form Result := ''; Exit; end; end; //check for any bookmarks in the document if Pos('#', Result) 0 then Result := Copy(Result, 1, Pos('#', Result) -1); if pos('/', Result) 0 then Delete(Result, pos('/', Result), 1); end; {Query the port number of a url if any EG. QueryPort will return 8000 else it will return 0} function QueryPort(const URL: String): Integer; var DotIdx, SlashIdx: Integer; Tmp, Buffer: String; begin Tmp := URL; if Pos('://', Tmp) 0 then Delete(Tmp, Pos('://', Tmp), 3); SlashIdx := Pos('/', Tmp) -1; if SlashIdx SlashIdx := Length(Tmp); DotIdx := Pos(':', Tmp); if DotIdx // no Port Number so exit gracefully Exit; Buffer := Copy(Tmp, DotIdx+1, SlashIdx); val(Buffer, Result, DotIdx); end; {Query a Bookmark in a document EG. QueryBookMark will return 'notwelcome'} function QueryBookmark(const URL: String): String; var HashPos: Integer; QuestionPos: Integer; begin HashPos:= Pos('#', URL); if (HashPos QuestionPos:= Pos('?', URL); if (QuestionPos QuestionPos := Length(URL); Result := Copy(URL, HashPos+1, QuestionPos-1); end; procedure ParseURI(const URI: String; var Host, Document, BookMark, Port: String; QueryParams: TStrings); var QueryPos, DocPos: Integer; begin ProcessQuery(URI, QueryParams, QueryPos); BookMark := QueryBookmark(URI); Port := IntTOStr(QueryPort(URI)); Document := QueryDocument(URI, DocPos); Host := Copy(URI, 1, DocPos -1); end; By the way this algorithm way can be extended, expanded, etc., and I am working on it. Any suggestions, comments, critics, drop a comment. KNOWN BUGS: * According to some sources the Pos function is not as fast as it should be and can not be used for reverse string positioning * Yes I know this aint the best algorithm for URI parsing, as of this writing I am working out on enhancements, code enlargements etc.