Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Check if 2 URLs are equal

Tags:

c#

asp.net

Is there a method around that tests if 2 URLs are equal, ie point to the same place. I am not talking about 2 URLs with different domain names pointing to the same IP address but for example 2 URLs that point to the same .aspx page:

  • http://example.com/Products/Default.aspx?A=B&C=D&E=F

is equal to these:

  • http://example.com/Products/Default.aspx
  • http://example.com/Products/
  • ~/Products/Default.aspx
  • ~/Products/

Note/assumtions

  1. QueryString Values are Ignored
  2. ASP.NET (Pref C#)
  3. Default.aspx is the default page

----UPDATE----

This is a very crude method that tests a URL to see if matches the current URL: I tried the creating a new Uri() with both the local and check URLs but dont know that works and went down the string checking avenue. The implementation of the SiteMapProvider skips this step if the URL starts with "Http" as this assumes an external URL. Since I have an SaaS framework that will always ensure relative paths (as these can be on different subdomains) it easier to strip things down.

Any comments on optimization? I guess for a start we can pass in a variable containing the current URL? Not sure of the overhead of calling HttpContext.Current.Request.Url.LocalPath many times?

    /// <summary>
    /// Assumes URL is relative aspx page or folder path
    /// </summary>
    /// <param name="url"></param>
    /// <returns></returns>
    public static bool CurrentURLMatch(string url)
    {
        string localURL = HttpContext.Current.Request.Url.LocalPath;
        if (HttpContext.Current.Request.Url.Host == "localhost")
        {
            localURL = localURL.Substring(localURL.IndexOf('/') + 1);
            localURL = localURL.Substring(localURL.IndexOf('/'));
        }
        string compareURL = url.ToLower();

        // Remove QueryString Values
        if (localURL.Contains("?"))
        {
            localURL = localURL.Split('?')[0];
        }

        if (compareURL.Contains("?"))
        {
            compareURL = compareURL.Split('?')[0];
        }

        if (localURL.Contains("#"))
        {
            localURL = localURL.Split('#')[0];
        }
        if (compareURL.Contains("?"))
        {
            compareURL = compareURL.Split('#')[0];
        }

        // Prepare End of Local URL
        if (!localURL.Contains("aspx"))
        {
            if (!localURL.EndsWith("/"))
            {
                localURL = String.Concat(localURL, "/");
            }
        }

        // Prepare End of Compare URL
        if (!compareURL.Contains("aspx"))
        {
            if (!compareURL.EndsWith("/"))
            {
                compareURL = String.Concat(localURL, "/");
            }
        }

        if (localURL.EndsWith(@"/"))
        {
            localURL = String.Concat(localURL, "Default.aspx");
        }

        if (compareURL.EndsWith(@"/"))
        {
            compareURL = String.Concat(compareURL, "Default.aspx");
        }

        if (compareURL.Contains(@"//"))
        {
            compareURL = compareURL.Replace(@"//", String.Empty);
            compareURL = compareURL.Substring(compareURL.IndexOf("/") + 1);
        }

        compareURL = compareURL.Replace("~", String.Empty);

        if (localURL == compareURL)
        {
            return true;
        }

        return false;
    }
like image 712
Mark Redman Avatar asked Aug 03 '09 14:08

Mark Redman


1 Answers

for the record, here is the translation of http://en.wikipedia.org/wiki/URL%5Fnormalization to C#:

using System;
using System.Web;

namespace UrlNormalizationTest
{
    public static class UrlNormalization
    {
        public static bool AreTheSameUrls(this string url1, string url2)
        {
            url1 = url1.NormalizeUrl();
            url2 = url2.NormalizeUrl();
            return url1.Equals(url2);
        }

        public static bool AreTheSameUrls(this Uri uri1, Uri uri2)
        {
            var url1 = uri1.NormalizeUrl();
            var url2 = uri2.NormalizeUrl();
            return url1.Equals(url2);
        }

        public static string[] DefaultDirectoryIndexes = new[]
            {
                "default.asp",
                "default.aspx",
                "index.htm",
                "index.html",
                "index.php"
            };

        public static string NormalizeUrl(this Uri uri)
        {
            var url = urlToLower(uri);
            url = limitProtocols(url);
            url = removeDefaultDirectoryIndexes(url);
            url = removeTheFragment(url);
            url = removeDuplicateSlashes(url);
            url = addWww(url);
            url = removeFeedburnerPart(url);
            return removeTrailingSlashAndEmptyQuery(url);
        }

        public static string NormalizeUrl(this string url)
        {
            return NormalizeUrl(new Uri(url));
        }

        private static string removeFeedburnerPart(string url)
        {
            var idx = url.IndexOf("utm_source=", StringComparison.Ordinal);
            return idx == -1 ? url : url.Substring(0, idx - 1);
        }

        private static string addWww(string url)
        {
            if (new Uri(url).Host.Split('.').Length == 2 && !url.Contains("://www."))
            {
               return url.Replace("://", "://www.");
            }
            return url;
        }

        private static string removeDuplicateSlashes(string url)
        {
            var path = new Uri(url).AbsolutePath;
            return path.Contains("//") ? url.Replace(path, path.Replace("//", "/")) : url;
        }

        private static string limitProtocols(string url)
        {
            return new Uri(url).Scheme == "https" ? url.Replace("https://", "http://") : url;
        }

        private static string removeTheFragment(string url)
        {
            var fragment = new Uri(url).Fragment;
            return string.IsNullOrWhiteSpace(fragment) ? url : url.Replace(fragment, string.Empty);
        }

        private static string urlToLower(Uri uri)
        {
            return HttpUtility.UrlDecode(uri.AbsoluteUri.ToLowerInvariant());
        }

        private static string removeTrailingSlashAndEmptyQuery(string url)
        {
            return url
                    .TrimEnd(new[] { '?' })
                    .TrimEnd(new[] { '/' });
        }

        private static string removeDefaultDirectoryIndexes(string url)
        {
            foreach (var index in DefaultDirectoryIndexes)
            {
                if (url.EndsWith(index))
                {
                    url = url.TrimEnd(index.ToCharArray());
                    break;
                }
            }
            return url;
        }
    }
}

With the following tests:

using NUnit.Framework;
using UrlNormalizationTest;

namespace UrlNormalization.Tests
{
    [TestFixture]
    public class UnitTests
    {
        [Test]
        public void Test1ConvertingTheSchemeAndHostToLowercase()
        {
            var url1 = "HTTP://www.Example.com/".NormalizeUrl();
            var url2 = "http://www.example.com/".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test2CapitalizingLettersInEscapeSequences()
        {
            var url1 = "http://www.example.com/a%c2%b1b".NormalizeUrl();
            var url2 = "http://www.example.com/a%C2%B1b".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test3DecodingPercentEncodedOctetsOfUnreservedCharacters()
        {
            var url1 = "http://www.example.com/%7Eusername/".NormalizeUrl();
            var url2 = "http://www.example.com/~username/".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test4RemovingTheDefaultPort()
        {
            var url1 = "http://www.example.com:80/bar.html".NormalizeUrl();
            var url2 = "http://www.example.com/bar.html".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test5AddingTrailing()
        {
            var url1 = "http://www.example.com/alice".NormalizeUrl();
            var url2 = "http://www.example.com/alice/?".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test6RemovingDotSegments()
        {
            var url1 = "http://www.example.com/../a/b/../c/./d.html".NormalizeUrl();
            var url2 = "http://www.example.com/a/c/d.html".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test7RemovingDirectoryIndex1()
        {
            var url1 = "http://www.example.com/default.asp".NormalizeUrl();
            var url2 = "http://www.example.com/".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test7RemovingDirectoryIndex2()
        {
            var url1 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
            var url2 = "http://www.example.com/default.asp?id=1".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test7RemovingDirectoryIndex3()
        {
            var url1 = "http://www.example.com/a/index.html".NormalizeUrl();
            var url2 = "http://www.example.com/a/".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test8RemovingTheFragment()
        {
            var url1 = "http://www.example.com/bar.html#section1".NormalizeUrl();
            var url2 = "http://www.example.com/bar.html".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test9LimitingProtocols()
        {
            var url1 = "https://www.example.com/".NormalizeUrl();
            var url2 = "http://www.example.com/".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test10RemovingDuplicateSlashes()
        {
            var url1 = "http://www.example.com/foo//bar.html".NormalizeUrl();
            var url2 = "http://www.example.com/foo/bar.html".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test11AddWww()
        {
            var url1 = "http://example.com/".NormalizeUrl();
            var url2 = "http://www.example.com".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }

        [Test]
        public void Test12RemoveFeedburnerPart()
        {
            var url1 = "http://site.net/2013/02/firefox-19-released/?utm_source=rss&utm_medium=rss&utm_campaign=firefox-19-released".NormalizeUrl();
            var url2 = "http://site.net/2013/02/firefox-19-released".NormalizeUrl();

            Assert.AreEqual(url1, url2);
        }
    }
}
like image 146
VahidN Avatar answered Oct 02 '22 16:10

VahidN