Is there a method around that tests if 2 URLs are equal, ie point to the same place. I am not talking about 2 URLs with different domain names pointing to the same IP address but for example 2 URLs that point to the same .aspx page:
is equal to these:
Note/assumtions
----UPDATE----
This is a very crude method that tests a URL to see if matches the current URL: I tried the creating a new Uri() with both the local and check URLs but dont know that works and went down the string checking avenue. The implementation of the SiteMapProvider skips this step if the URL starts with "Http" as this assumes an external URL. Since I have an SaaS framework that will always ensure relative paths (as these can be on different subdomains) it easier to strip things down.
Any comments on optimization? I guess for a start we can pass in a variable containing the current URL? Not sure of the overhead of calling HttpContext.Current.Request.Url.LocalPath many times?
/// <summary>
/// Assumes URL is relative aspx page or folder path
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static bool CurrentURLMatch(string url)
{
string localURL = HttpContext.Current.Request.Url.LocalPath;
if (HttpContext.Current.Request.Url.Host == "localhost")
{
localURL = localURL.Substring(localURL.IndexOf('/') + 1);
localURL = localURL.Substring(localURL.IndexOf('/'));
}
string compareURL = url.ToLower();
// Remove QueryString Values
if (localURL.Contains("?"))
{
localURL = localURL.Split('?')[0];
}
if (compareURL.Contains("?"))
{
compareURL = compareURL.Split('?')[0];
}
if (localURL.Contains("#"))
{
localURL = localURL.Split('#')[0];
}
if (compareURL.Contains("?"))
{
compareURL = compareURL.Split('#')[0];
}
// Prepare End of Local URL
if (!localURL.Contains("aspx"))
{
if (!localURL.EndsWith("/"))
{
localURL = String.Concat(localURL, "/");
}
}
// Prepare End of Compare URL
if (!compareURL.Contains("aspx"))
{
if (!compareURL.EndsWith("/"))
{
compareURL = String.Concat(localURL, "/");
}
}
if (localURL.EndsWith(@"/"))
{
localURL = String.Concat(localURL, "Default.aspx");
}
if (compareURL.EndsWith(@"/"))
{
compareURL = String.Concat(compareURL, "Default.aspx");
}
if (compareURL.Contains(@"//"))
{
compareURL = compareURL.Replace(@"//", String.Empty);
compareURL = compareURL.Substring(compareURL.IndexOf("/") + 1);
}
compareURL = compareURL.Replace("~", String.Empty);
if (localURL == compareURL)
{
return true;
}
return false;
}
for the record, here is the translation of http://en.wikipedia.org/wiki/URL%5Fnormalization to C#:
using System;
using System.Web;
namespace UrlNormalizationTest
{
public static class UrlNormalization
{
public static bool AreTheSameUrls(this string url1, string url2)
{
url1 = url1.NormalizeUrl();
url2 = url2.NormalizeUrl();
return url1.Equals(url2);
}
public static bool AreTheSameUrls(this Uri uri1, Uri uri2)
{
var url1 = uri1.NormalizeUrl();
var url2 = uri2.NormalizeUrl();
return url1.Equals(url2);
}
public static string[] DefaultDirectoryIndexes = new[]
{
"default.asp",
"default.aspx",
"index.htm",
"index.html",
"index.php"
};
public static string NormalizeUrl(this Uri uri)
{
var url = urlToLower(uri);
url = limitProtocols(url);
url = removeDefaultDirectoryIndexes(url);
url = removeTheFragment(url);
url = removeDuplicateSlashes(url);
url = addWww(url);
url = removeFeedburnerPart(url);
return removeTrailingSlashAndEmptyQuery(url);
}
public static string NormalizeUrl(this string url)
{
return NormalizeUrl(new Uri(url));
}
private static string removeFeedburnerPart(string url)
{
var idx = url.IndexOf("utm_source=", StringComparison.Ordinal);
return idx == -1 ? url : url.Substring(0, idx - 1);
}
private static string addWww(string url)
{
if (new Uri(url).Host.Split('.').Length == 2 && !url.Contains("://www."))
{
return url.Replace("://", "://www.");
}
return url;
}
private static string removeDuplicateSlashes(string url)
{
var path = new Uri(url).AbsolutePath;
return path.Contains("//") ? url.Replace(path, path.Replace("//", "/")) : url;
}
private static string limitProtocols(string url)
{
return new Uri(url).Scheme == "https" ? url.Replace("https://", "http://") : url;
}
private static string removeTheFragment(string url)
{
var fragment = new Uri(url).Fragment;
return string.IsNullOrWhiteSpace(fragment) ? url : url.Replace(fragment, string.Empty);
}
private static string urlToLower(Uri uri)
{
return HttpUtility.UrlDecode(uri.AbsoluteUri.ToLowerInvariant());
}
private static string removeTrailingSlashAndEmptyQuery(string url)
{
return url
.TrimEnd(new[] { '?' })
.TrimEnd(new[] { '/' });
}
private static string removeDefaultDirectoryIndexes(string url)
{
foreach (var index in DefaultDirectoryIndexes)
{
if (url.EndsWith(index))
{
url = url.TrimEnd(index.ToCharArray());
break;
}
}
return url;
}
}
}
With the following tests:
using NUnit.Framework;
using UrlNormalizationTest;
namespace UrlNormalization.Tests
{
[TestFixture]
public class UnitTests
{
[Test]
public void Test1ConvertingTheSchemeAndHostToLowercase()
{
var url1 = "HTTP://www.Example.com/".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test2CapitalizingLettersInEscapeSequences()
{
var url1 = "http://www.example.com/a%c2%b1b".NormalizeUrl();
var url2 = "http://www.example.com/a%C2%B1b".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test3DecodingPercentEncodedOctetsOfUnreservedCharacters()
{
var url1 = "http://www.example.com/%7Eusername/".NormalizeUrl();
var url2 = "http://www.example.com/~username/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test4RemovingTheDefaultPort()
{
var url1 = "http://www.example.com:80/bar.html".NormalizeUrl();
var url2 = "http://www.example.com/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test5AddingTrailing()
{
var url1 = "http://www.example.com/alice".NormalizeUrl();
var url2 = "http://www.example.com/alice/?".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test6RemovingDotSegments()
{
var url1 = "http://www.example.com/../a/b/../c/./d.html".NormalizeUrl();
var url2 = "http://www.example.com/a/c/d.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex1()
{
var url1 = "http://www.example.com/default.asp".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex2()
{
var url1 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
var url2 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex3()
{
var url1 = "http://www.example.com/a/index.html".NormalizeUrl();
var url2 = "http://www.example.com/a/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test8RemovingTheFragment()
{
var url1 = "http://www.example.com/bar.html#section1".NormalizeUrl();
var url2 = "http://www.example.com/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test9LimitingProtocols()
{
var url1 = "https://www.example.com/".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test10RemovingDuplicateSlashes()
{
var url1 = "http://www.example.com/foo//bar.html".NormalizeUrl();
var url2 = "http://www.example.com/foo/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test11AddWww()
{
var url1 = "http://example.com/".NormalizeUrl();
var url2 = "http://www.example.com".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test12RemoveFeedburnerPart()
{
var url1 = "http://site.net/2013/02/firefox-19-released/?utm_source=rss&utm_medium=rss&utm_campaign=firefox-19-released".NormalizeUrl();
var url2 = "http://site.net/2013/02/firefox-19-released".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With