I need to be able to break a URL down into different segments. Take this path for example:
http://login:[email protected]:8080/some_path/something_else.html?param1=val¶m2=val#nose
\__/ \___/ \______/ \_____________________/ \__/\____________________________/ \___________________/ \__/
| | | | | | | |
Scheme Username Password Host Port Path Query Fragment
This should break down as follows:
Protocol: HTTP
Username: login
Password: password
Host: somehost.somedomain.com
Port: 8080
Path Info: /some_path/something_else.html
Query String: param1=val¶m2=val
How can I do this in Delphi? Is there something ready made which can split this up for me? If not, how do I go about parsing all the different possible formats? This is assuming that it might even be a different protocol, such as HTTPS or RTSP.
URL Parsing. The URL parsing functions focus on splitting a URL string into its components, or on combining URL components into a URL string.
A URL consists of five parts: the scheme, subdomain, top-level domain, second-level domain, and subdirectory. Below is an illustration of the different parts of a URL. Let's break down this URL structure below.
XE2 ships with Indy, which has a TIdURI
class for that purpose, eg:
uses
..., IdURI;
var
URI: TIdURI;
URI := TIdURI.Create('http://login:[email protected]:8080/some_path/something_else.html?param1=val¶m2=val');
try
// Protocol = URI.Protocol
// Username = URI.Username
// Password = URI.Password
// Host = URI.Host
// Port = URI.Port
// Path = URI.Path
// Query = URI.Params
finally
URI.Free;
end;
You can use the InternetCrackUrl
method.
Try this simple
{$APPTYPE CONSOLE}
uses
Windows,
SysUtils,
WinInet;
procedure ParseURL(const lpszUrl: string);
var
lpszScheme : array[0..INTERNET_MAX_SCHEME_LENGTH - 1] of Char;
lpszHostName : array[0..INTERNET_MAX_HOST_NAME_LENGTH - 1] of Char;
lpszUserName : array[0..INTERNET_MAX_USER_NAME_LENGTH - 1] of Char;
lpszPassword : array[0..INTERNET_MAX_PASSWORD_LENGTH - 1] of Char;
lpszUrlPath : array[0..INTERNET_MAX_PATH_LENGTH - 1] of Char;
lpszExtraInfo : array[0..1024 - 1] of Char;
lpUrlComponents : TURLComponents;
begin
ZeroMemory(@lpszScheme, SizeOf(lpszScheme));
ZeroMemory(@lpszHostName, SizeOf(lpszHostName));
ZeroMemory(@lpszUserName, SizeOf(lpszUserName));
ZeroMemory(@lpszPassword, SizeOf(lpszPassword));
ZeroMemory(@lpszUrlPath, SizeOf(lpszUrlPath));
ZeroMemory(@lpszExtraInfo, SizeOf(lpszExtraInfo));
ZeroMemory(@lpUrlComponents, SizeOf(TURLComponents));
lpUrlComponents.dwStructSize := SizeOf(TURLComponents);
lpUrlComponents.lpszScheme := lpszScheme;
lpUrlComponents.dwSchemeLength := SizeOf(lpszScheme);
lpUrlComponents.lpszHostName := lpszHostName;
lpUrlComponents.dwHostNameLength := SizeOf(lpszHostName);
lpUrlComponents.lpszUserName := lpszUserName;
lpUrlComponents.dwUserNameLength := SizeOf(lpszUserName);
lpUrlComponents.lpszPassword := lpszPassword;
lpUrlComponents.dwPasswordLength := SizeOf(lpszPassword);
lpUrlComponents.lpszUrlPath := lpszUrlPath;
lpUrlComponents.dwUrlPathLength := SizeOf(lpszUrlPath);
lpUrlComponents.lpszExtraInfo := lpszExtraInfo;
lpUrlComponents.dwExtraInfoLength := SizeOf(lpszExtraInfo);
InternetCrackUrl(PChar(lpszUrl), Length(lpszUrl), ICU_DECODE or ICU_ESCAPE, lpUrlComponents);
Writeln(Format('Protocol : %s',[lpszScheme]));
Writeln(Format('Host : %s',[lpszHostName]));
Writeln(Format('User : %s',[lpszUserName]));
Writeln(Format('Password : %s',[lpszPassword]));
Writeln(Format('Path : %s',[lpszUrlPath]));
Writeln(Format('ExtraInfo: %s',[lpszExtraInfo]));
end;
begin
try
ParseURL('http://login:[email protected]/some_path/something_else.html?param1=val¶m2=val');
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
readln;
end.
This will return
Protocol : http
Host : somehost.somedomain.com
User : login
Password : password
Path : /some_path/something_else.html
ExtraInfo: ?param1=val¶m2=val
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With