admin管理员组文章数量:1335430
Need a function like:
function isGoogleURL(url) { ... }
that returns true iff URL belongs to Google. No false positives; no false negatives.
Luckily there's this as a reference:
.google .google.ad .google.ae .google.af .google.ag .google.ai .google.am .google.it.ao .google.ar .google.as .google.at .google.au .google.az .google.ba .google.bd .google.be .google.bg .google.bh .google.bi .google.bn .google.bo .google.br .google.bs .google.co.bw .google.by .google.bz .google.ca .google.cd .google.cg .google.ch .google.ci .google.co.ck .google.cl .google .google.co .google.co.cr .google.cu .google.cz .google.de .google.dj .google.dk .google.dm .google.do .google.dz .google.ec .google.ee .google.eg .google.es .google.et .google.fi .google.fj .google.fm .google.fr .google.ge .google.gg .google.gh .google.gi .google.gl .google.gm .google.gp .google.gr .google.gt .google.gy .google.hk .google.hn .google.hr .google.ht .google.hu .google.co.id .google.ie .google.co.il .google.im .google.co.in .google.is .google.it .google.je .google.jm .google.jo .google.co.jp .google.co.ke .google.kh .google.ki .google.kg .google.co.kr .google.kz .google.la .google.li .google.lk .google.co.ls .google.lt .google.lu .google.lv .google.ly .google.co.ma .google.md .google.mn .google.ms .google.mt .google.mu .google.mv .google.mw .google.mx .google.my .google.co.mz .google.na .google.nf .google.ng .google.ni .google.nl .google.no .google.np .google.nr .google.nu .google.co.nz .google.om .google.pa .google.pe .google.ph .google.pk .google.pl .google.pn .google.pr .google.pt .google.py .google.qa .google.ro .google.ru .google.rw .google.sa .google.sb .google.sc .google.se .google.sg .google.sh .google.si .google.sk .google.sn .google.sm .google.st .google.sv .google.co.th .google.tj .google.tk .google.tl .google.tm .google.to .google.tr .google.tt .google.tw .google.co.tz .google.ua .google.co.ug .google.co.uk .google.uy .google.co.uz .google.vc .google.co.ve .google.vg .google.co.vi .google.vn .google.vu .google.ws .google.rs .google.co.za .google.co.zm .google.co.zw .google.cat
Any ideas how to do this elegantly?
Some Clarifications:
- I need this for a greasemonkey script I wrote that currently only works for google (and should work for all other TLDs as well). Here is the script (it modifies Google Reader to work on wide screens better).
- It should work on URLs that belong to the above domains (not blogger, etc.).
Need a function like:
function isGoogleURL(url) { ... }
that returns true iff URL belongs to Google. No false positives; no false negatives.
Luckily there's this as a reference:
.google. .google.ad .google.ae .google..af .google..ag .google..ai .google.am .google.it.ao .google..ar .google.as .google.at .google..au .google.az .google.ba .google..bd .google.be .google.bg .google..bh .google.bi .google..bn .google..bo .google..br .google.bs .google.co.bw .google..by .google..bz .google.ca .google.cd .google.cg .google.ch .google.ci .google.co.ck .google.cl .google.cn .google..co .google.co.cr .google..cu .google.cz .google.de .google.dj .google.dk .google.dm .google..do .google.dz .google..ec .google.ee .google..eg .google.es .google..et .google.fi .google..fj .google.fm .google.fr .google.ge .google.gg .google..gh .google..gi .google.gl .google.gm .google.gp .google.gr .google..gt .google.gy .google..hk .google.hn .google.hr .google.ht .google.hu .google.co.id .google.ie .google.co.il .google.im .google.co.in .google.is .google.it .google.je .google..jm .google.jo .google.co.jp .google.co.ke .google..kh .google.ki .google.kg .google.co.kr .google.kz .google.la .google.li .google.lk .google.co.ls .google.lt .google.lu .google.lv .google..ly .google.co.ma .google.md .google.mn .google.ms .google..mt .google.mu .google.mv .google.mw .google..mx .google..my .google.co.mz .google..na .google..nf .google..ng .google..ni .google.nl .google.no .google..np .google.nr .google.nu .google.co.nz .google..om .google..pa .google..pe .google..ph .google..pk .google.pl .google.pn .google..pr .google.pt .google..py .google..qa .google.ro .google.ru .google.rw .google..sa .google..sb .google.sc .google.se .google..sg .google.sh .google.si .google.sk .google.sn .google.sm .google.st .google..sv .google.co.th .google..tj .google.tk .google.tl .google.tm .google.to .google..tr .google.tt .google..tw .google.co.tz .google..ua .google.co.ug .google.co.uk .google..uy .google.co.uz .google..vc .google.co.ve .google.vg .google.co.vi .google..vn .google.vu .google.ws .google.rs .google.co.za .google.co.zm .google.co.zw .google.cat
Any ideas how to do this elegantly?
Some Clarifications:
- I need this for a greasemonkey script I wrote that currently only works for google. (and should work for all other TLDs as well). Here is the script (it modifies Google Reader to work on wide screens better).
- It should work on URLs that belong to the above domains (not blogger., etc.).
- No false positives and no false negatives is a lot to ask for. What if I make a page at google.rs.mydomain./hi? – Tom Ritter Commented Nov 3, 2008 at 20:08
- 1 I don't see google.rs.mydomain. in the domain whitelist above. – theraccoonbear Commented Nov 3, 2008 at 20:41
9 Answers
Reset to default 6Here is an updated version of Prestaul's answer which solves the two problems I mentioned in the ment there.
var GOOGLE_DOMAINS = ([
'.google.',
'.google.ad',
'.google.ae',
'.google..af',
'.google..ag',
'.google..ai',
'.google.am',
'.google.it.ao',
'.google..ar',
'.google.as',
'.google.at',
'.google..au',
'.google.az',
'.google.ba',
'.google..bd'
]).join('\n');
function isGoogleUrl(url) {
// get the 2nd level domain from the url
var domain = /^https?:\/\/[^\///]*(google\.[^\/\\]+)\//i.exec(url);
if(!domain) return false;
domain = '.'+domain[1];
// create a regex to check to see if the domain is supported
var re = new RegExp('^' + domain.replace(/\./g, '\\.') + '$', 'mi');
return re.test(GOOGLE_DOMAINS);
}
alert(isGoogleUrl('http://www.google.ba/the/page.html')); // true
alert(isGoogleUrl('http://some_mal_site./http://www.google.ba/')); // false
alert(isGoogleUrl('https://google..au/')); // true
alert(isGoogleUrl('http://www.google..some_mal_site./')); // false
alert(isGoogleUrl('http://yahoo./')); // false
All the domains end in either "google.xx", "google.co.xx", or "google..xx" except "google.it.ao" and "google.", so if you just look at the domain, this regular expression should work for most cases (it's not perfect, but it accepts all the listed domains, and rejects most other valid domains that happen to include "google"):
/^(\w+\.)*google\.((\.|co\.|it\.)?([a-z]{2})|)$/i
As a function you could do something like this:
function isGoogleUrl(url) {
url = url.replace(/^https?:\/\//i, ''); // Strip "http://" from the beginning
url = url.replace(/\/.*/, ''); // Strip off the path
return /^(\w+\.)*google\.((\.|co\.|it\.)?([a-z]{2})|)$/i.test(url);
}
You could simplify it if you use window.location.hostname
:
function isGoogleUrl() {
return /^(\w+\.)*google\.((\.|co\.|it\.)?([a-z]{2})|)$/i.test(window.location.hostname);
}
The only way this should allow a false positive is if there is a "google.(some other TLD)". For example, "google.tv", is not on the list (it redirects to google.), but it would pass.
Edit: Like Wimmel pointed out, it also accepts invalid domains like "google..fr" which are not listed. It will basically accept any "google.whatever" domain name.
Do you count other Google properties as "belonging to Google"? FeedBurner, Blogger etc?
Can I ask what the purpose of this is? There may be a better way of doing what you want... and if it's reasonable I can ask internally for you.
If you don't need the test to be 100% accurate, this simple regex would do for all the domains you posted above:
"(http://)?([\w]+)?\.google\.([\w]{2,3})"
Just testing the presence of ".google." would suffice in most cases, although it could easily be fooled by adding a "google" domain in the url (not so easy though, nor quickly done).
Or just wait for google to buy their own google TLD.
I agree that you probably shouldn't do this... However, if you are going to do it (and you aren't content with the previously offered solutions that just check for a google-like pattern) then this is how I would approach it:
var GOOGLE_DOMAINS = ([
'.google.',
'.google.ad',
'.google.ae',
'.google..af',
'.google..ag',
'.google..ai',
'.google.am',
'.google.it.ao',
'.google..ar',
'.google.as',
'.google.at',
'.google..au',
'.google.az',
'.google.ba',
'.google..bd'
]).join('\n');
function isGoogleUrl(url) {
var url = 'http://www.google.ba/the/page.html';
// get the domain from the url
var domain = /\.google\.[^\/\\]+/i.exec(url) + '';
if(!domain) return false;
// create a regex to check to see if the domain is supported
var re = new RegExp('^' + domain.replace(/\./g, '\\.') + '$', 'mi');
return re.test(GOOGLE_DOMAINS);
}
This creates a regex based on the domain your url and uses it to test the list of domains.
Note: The GOOGLE_DOMAINS
variable is just a string that holds the contents returned from the url you posted. There is no way for you to retrieve that string via AJAX or iframe because you cannot make such a request across domains. You'll have to hard code it or make a request server-side to retrieve that list.
A regular expression may be what you need. An example is:
<script>
var elem = document.getElementById("a");
var regex = new RegExp("(http://)?(www\\.)?google\\.");
elem.innerHTML = regex.test(elem.innerHTML);
</script>
This would get the content of a span element "a", and would change it to "true" if google., and "false" otherwise. Note that it doesn't consider all other URLs(although the regex could easily be modified to do so), and "pages.google.", for example, wouldn't match.
Also, your URLs all have a "." before them(".google." instead of "google."). Does this have any reason or is it just a mistake?
You could use a regular expression like....
^https?://[-A-Za-z0-9\.]+(\.google\.|\.google\.ad|\.google\.ae|\.google\.\.af|\.google\.\.ag|\.google\.\.ai|\.google\.am|\.google\.it\.ao|\.google\.\.ar|\.google\.as|\.google\.at|\.google\.\.au|\.google\.az|\.google\.ba|\.google\.\.bd|\.google\.be|\.google\.bg|\.google\.\.bh|\.google\.bi|\.google\.\.bn|\.google\.\.bo|\.google\.\.br|\.google\.bs|\.google\.co\.bw|\.google\.\.by|\.google\.\.bz|\.google\.ca|\.google\.cd|\.google\.cg|\.google\.ch|\.google\.ci|\.google\.co\.ck|\.google\.cl|\.google\.cn|\.google\.\.co|\.google\.co\.cr|\.google\.\.cu|\.google\.cz|\.google\.de|\.google\.dj|\.google\.dk|\.google\.dm|\.google\.\.do|\.google\.dz|\.google\.\.ec|\.google\.ee|\.google\.\.eg|\.google\.es|\.google\.\.et|\.google\.fi|\.google\.\.fj|\.google\.fm|\.google\.fr|\.google\.ge|\.google\.gg|\.google\.\.gh|\.google\.\.gi|\.google\.gl|\.google\.gm|\.google\.gp|\.google\.gr|\.google\.\.gt|\.google\.gy|\.google\.\.hk|\.google\.hn|\.google\.hr|\.google\.ht|\.google\.hu|\.google\.co\.id|\.google\.ie|\.google\.co\.il|\.google\.im|\.google\.co\.in|\.google\.is|\.google\.it|\.google\.je|\.google\.\.jm|\.google\.jo|\.google\.co\.jp|\.google\.co\.ke|\.google\.\.kh|\.google\.ki|\.google\.kg|\.google\.co\.kr|\.google\.kz|\.google\.la|\.google\.li|\.google\.lk|\.google\.co\.ls|\.google\.lt|\.google\.lu|\.google\.lv|\.google\.\.ly|\.google\.co\.ma|\.google\.md|\.google\.mn|\.google\.ms|\.google\.\.mt|\.google\.mu|\.google\.mv|\.google\.mw|\.google\.\.mx|\.google\.\.my|\.google\.co\.mz|\.google\.\.na|\.google\.\.nf|\.google\.\.ng|\.google\.\.ni|\.google\.nl|\.google\.no|\.google\.\.np|\.google\.nr|\.google\.nu|\.google\.co\.nz|\.google\.\.om|\.google\.\.pa|\.google\.\.pe|\.google\.\.ph|\.google\.\.pk|\.google\.pl|\.google\.pn|\.google\.\.pr|\.google\.pt|\.google\.\.py|\.google\.\.qa|\.google\.ro|\.google\.ru|\.google\.rw|\.google\.\.sa|\.google\.\.sb|\.google\.sc|\.google\.se|\.google\.\.sg|\.google\.sh|\.google\.si|\.google\.sk|\.google\.sn|\.google\.sm|\.google\.st|\.google\.\.sv|\.google\.co\.th|\.google\.\.tj|\.google\.tk|\.google\.tl|\.google\.tm|\.google\.to|\.google\.\.tr|\.google\.tt|\.google\.\.tw|\.google\.co\.tz|\.google\.\.ua|\.google\.co\.ug|\.google\.co\.uk|\.google\.\.uy|\.google\.co\.uz|\.google\.\.vc|\.google\.co\.ve|\.google\.vg|\.google\.co\.vi|\.google\.\.vn|\.google\.vu|\.google\.ws|\.google\.rs|\.google\.co\.za|\.google\.co\.zm|\.google\.co\.zw|\.google\.cat)
and I'd imagine generating that in JavaScript (or whatever language you choose) from an array or some other data set would be relatively easy.
I wouldn't do this client-side.
The list of Google domains doesn't change so frequently, so you could store a list server-side and then dynamically generate the .js to check it.
Without a regex to individually match each and every TLD, there isn't really an 'elegant way of doing it'.
本文标签: greasemonkeyJavaScript function to match only Google URLsStack Overflow
版权声明:本文标题:greasemonkey - JavaScript function to match only Google URLs - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1742247845a2440189.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论