How can remove http(s), www(n) and public suffixes in ruby? -
input => expected output
https://mail.google.com.au
=>mail.google
http://www.google.in
=>google
https://www9.calendar.google.co.uk
=>calendar.google
https://www12.stage.calendar.google.co.uk
=>stage.calendar.google
www.blog.botreetechnologies.com
=>blog.botreetechnologies
update
t = uri.parse 'http://www.google.com' t.host #=> "www.google.com" uri.split 'http://www.google.com' #=> ["http", nil, "www.google.com", nil, nil, "", nil, nil, nil] uri = uri.parse("http://www.google.co.uk") #=> #<uri::http http://www.google.co.uk> domain = publicsuffix.parse(uri.host) #=> #<publicsuffix::domain:0x00000003c538e0 @sld="google", @tld="co.uk", @trd="www"> domain.sld #=> "google" uri = uri.parse("http://www.mail.google.co.uk") #=> #<uri::http http://www.mail.google.co.uk> domain = publicsuffix.parse(uri.host) #=> #<publicsuffix::domain:0x00000002e97bc0 @sld="google", @tld="co.uk", @trd="www.mail"> domain.sld #=> "google" %w[http://www.example.com/page http://blog.example.com/page].each |u| puts uri.parse(u).host.sub(/^www\./, '') end # example.com # blog.example.com uri = uri.parse("www.pinkpoodles.com.au") #=> #<uri::generic www.pinkpoodles.com.au> uri.host #=> nil
i can't think of "one-liner", work:
require 'uri' require 'public_suffix' def simple_host(uri) uri = uri(uri) uri = uri("http://#{uri}") unless uri.scheme domain = publicsuffix.parse(uri.host) trd = domain.trd if trd trd = trd.split('.') trd.shift if trd.first.start_with?('www') end [*trd, domain.sld].join('.') end simple_host('https://mail.google.com.au') #=> "mail.google" simple_host('http://www.google.in') #=> "google" simple_host('https://www9.calendar.google.co.uk') #=> "calendar.google" simple_host('https://www12.stage.calendar.google.co.uk') #=> "stage.calendar.google" simple_host('www.blog.botreetechnologies.com') #=> "blog.botreetechnologies"
Comments
Post a Comment