Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

JavaScript function to match only Google URLs

Need a function like:

function isGoogleURL(url) { ... }

that returns true iff URL belongs to Google. No false positives; no false negatives.

Luckily there's this as a reference:

.google.com .google.ad .google.ae .google.com.af .google.com.ag .google.com.ai .google.am .google.it.ao .google.com.ar .google.as .google.at .google.com.au .google.az .google.ba .google.com.bd .google.be .google.bg .google.com.bh .google.bi .google.com.bn .google.com.bo .google.com.br .google.bs .google.co.bw .google.com.by .google.com.bz .google.ca .google.cd .google.cg .google.ch .google.ci .google.co.ck .google.cl .google.cn .google.com.co .google.co.cr .google.com.cu .google.cz .google.de .google.dj .google.dk .google.dm .google.com.do .google.dz .google.com.ec .google.ee .google.com.eg .google.es .google.com.et .google.fi .google.com.fj .google.fm .google.fr .google.ge .google.gg .google.com.gh .google.com.gi .google.gl .google.gm .google.gp .google.gr .google.com.gt .google.gy .google.com.hk .google.hn .google.hr .google.ht .google.hu .google.co.id .google.ie .google.co.il .google.im .google.co.in .google.is .google.it .google.je .google.com.jm .google.jo .google.co.jp .google.co.ke .google.com.kh .google.ki .google.kg .google.co.kr .google.kz .google.la .google.li .google.lk .google.co.ls .google.lt .google.lu .google.lv .google.com.ly .google.co.ma .google.md .google.mn .google.ms .google.com.mt .google.mu .google.mv .google.mw .google.com.mx .google.com.my .google.co.mz .google.com.na .google.com.nf .google.com.ng .google.com.ni .google.nl .google.no .google.com.np .google.nr .google.nu .google.co.nz .google.com.om .google.com.pa .google.com.pe .google.com.ph .google.com.pk .google.pl .google.pn .google.com.pr .google.pt .google.com.py .google.com.qa .google.ro .google.ru .google.rw .google.com.sa .google.com.sb .google.sc .google.se .google.com.sg .google.sh .google.si .google.sk .google.sn .google.sm .google.st .google.com.sv .google.co.th .google.com.tj .google.tk .google.tl .google.tm .google.to .google.com.tr .google.tt .google.com.tw .google.co.tz .google.com.ua .google.co.ug .google.co.uk .google.com.uy .google.co.uz .google.com.vc .google.co.ve .google.vg .google.co.vi .google.com.vn .google.vu .google.ws .google.rs .google.co.za .google.co.zm .google.co.zw .google.cat

Any ideas how to do this elegantly?

Some Clarifications:

  • I need this for a greasemonkey script I wrote that currently only works for google.com (and should work for all other TLDs as well). Here is the script (it modifies Google Reader to work on wide screens better).
  • It should work on URLs that belong to the above domains (not blogger.com, etc.).
like image 849
Assaf Lavie Avatar asked Apr 08 '26 05:04

Assaf Lavie


1 Answers

Here is an updated version of Prestaul's answer which solves the two problems I mentioned in the comment there.

var GOOGLE_DOMAINS = ([
    '.google.com',
    '.google.ad',
    '.google.ae',
    '.google.com.af',
    '.google.com.ag',
    '.google.com.ai',
    '.google.am',
    '.google.it.ao',
    '.google.com.ar',
    '.google.as',
    '.google.at',
    '.google.com.au',
    '.google.az',
    '.google.ba',
    '.google.com.bd'
]).join('\n');

function isGoogleUrl(url) {
    // get the 2nd level domain from the url
    var domain = /^https?:\/\/[^\///]*(google\.[^\/\\]+)\//i.exec(url);
    if(!domain) return false;

    domain = '.'+domain[1];
    // create a regex to check to see if the domain is supported
    var re = new RegExp('^' + domain.replace(/\./g, '\\.') + '$', 'mi');
    return re.test(GOOGLE_DOMAINS);
}

alert(isGoogleUrl('http://www.google.ba/the/page.html')); // true
alert(isGoogleUrl('http://some_mal_site.com/http://www.google.ba/')); // false
alert(isGoogleUrl('https://google.com.au/')); // true
alert(isGoogleUrl('http://www.google.com.some_mal_site.com/')); // false
alert(isGoogleUrl('http://yahoo.com/')); // false
like image 50
wimh Avatar answered Apr 10 '26 18:04

wimh