From 7cb29d78d4aada80c990b710f08392cd191eae5b Mon Sep 17 00:00:00 2001 From: Peter Bowen Date: Sun, 6 Nov 2016 22:41:38 +0000 Subject: [PATCH 1/3] Definition update (2016-11-06) --- data/list.txt | 114 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/data/list.txt b/data/list.txt index 497f7b1f..41edbbbc 100644 --- a/data/list.txt +++ b/data/list.txt @@ -704,7 +704,9 @@ org.cw cx gov.cx -// cy : https://en.wikipedia.org/wiki/.cy +// cy : http://www.nic.cy/ +// Submitted by registry Panayiotou Fotia +cy ac.cy biz.cy com.cy @@ -4367,9 +4369,17 @@ edu.my mil.my name.my -// mz : http://www.gobin.info/domainname/mz-template.doc -*.mz -!teledata.mz +// mz : http://www.uem.mz/ +// Submitted by registry +mz +ac.mz +adv.mz +co.mz +edu.mz +gov.mz +mil.mz +net.mz +org.mz // na : http://www.na-nic.com.na/ // http://www.info.na/domain/ @@ -10644,6 +10654,10 @@ beep.pl *.compute.estate *.alces.network +// alwaysdata : https://www.alwaysdata.com +// Submitted by Cyril +*.alwaysdata.net + // Amazon CloudFront : https://aws.amazon.com/cloudfront/ // Submitted by Donavan Miller cloudfront.net @@ -10825,6 +10839,20 @@ r.cdn77.net rsc.cdn77.org ssl.origin.cdn77-secure.org +// Cloud DNS Ltd : http://www.cloudns.net +// Submitted by Aleksander Hristov +cloudns.asia +cloudns.biz +cloudns.club +cloudns.cc +cloudns.eu +cloudns.in +cloudns.info +cloudns.org +cloudns.pro +cloudns.pw +cloudns.us + // CoDNS B.V. co.nl co.no @@ -11280,6 +11308,16 @@ fbxos.fr freebox-os.fr freeboxos.fr +// Fusion Intranet : https://www.fusion-intranet.com +// Submitted by Matthias Burtscher +myfusion.cloud + +// Futureweb OG : http://www.futureweb.at +// Submitted by Andreas Schnederle-Wagner +futuremailing.at +*.ex.ortsinfo.at +*.kunden.ortsinfo.at + // GDS : https://www.gov.uk/service-manual/operations/operating-servicegovuk-subdomains // Submitted by David Illsley service.gov.uk @@ -11391,6 +11429,7 @@ codespot.com googleapis.com googlecode.com pagespeedmobilizer.com +publishproxy.com withgoogle.com withyoutube.com @@ -11418,6 +11457,36 @@ iki.fi biz.at info.at +// Interlegis : http://www.interlegis.leg.br +// Submitted by Gabriel Ferreira +ac.leg.br +al.leg.br +am.leg.br +ap.leg.br +ba.leg.br +ce.leg.br +df.leg.br +es.leg.br +go.leg.br +ma.leg.br +mg.leg.br +ms.leg.br +mt.leg.br +pa.leg.br +pb.leg.br +pe.leg.br +pi.leg.br +pr.leg.br +rj.leg.br +rn.leg.br +ro.leg.br +rr.leg.br +rs.leg.br +sc.leg.br +se.leg.br +sp.leg.br +to.leg.br + // Joyent : https://www.joyent.com/ // Submitted by Brian Bennett *.triton.zone @@ -11427,6 +11496,14 @@ info.at // Submitted by Stefan Keim js.org +// Keyweb AG : https://www.keyweb.de +// Submitted by Martin Dannehl +keymachine.de + +// KnightPoint Systems, LLC : http://www.knightpoint.com/ +// Submitted by Roy Keene +knightpoint.systems + // .KRD : http://nic.krd/data/krd/Registration%20Policy.pdf co.krd edu.krd @@ -11566,6 +11643,10 @@ nyc.mn // Submitted by Eddie Jones nid.io +// OpenCraft GmbH : http://opencraft.com/ +// Submitted by Sven Marnach +opencraft.hosting + // Opera Software, A.S.A. // Submitted by Yngve Pettersen operaunite.com @@ -11646,6 +11727,12 @@ rhcloud.com // Submitted by Chris Kastorff hzc.io +// Revitalised Limited : http://www.revitalised.co.uk +// Submitted by Jack Price +wellbeingzone.eu +ptplus.fit +wellbeingzone.co.uk + // Sandstorm Development Group, Inc. : https://sandcats.io/ // Submitted by Asheesh Laroia sandcats.io @@ -11667,9 +11754,11 @@ myshopblocks.com // SinaAppEngine : http://sae.sina.com.cn/ // Submitted by SinaAppEngine +1kapp.com +appchizi.com +applinzi.com sinaapp.com vipsinaapp.com -1kapp.com // Skyhat : http://www.skyhat.io // Submitted by Shante Adam @@ -11683,6 +11772,11 @@ static.land dev.static.land sites.static.land +// SourceLair PC : https://www.sourcelair.com +// Submitted by Antonis Kalipetis +apps.lair.io +*.stolos.io + // SpaceKit : https://www.spacekit.io/ // Submitted by Reza Akhavan spacekit.io @@ -11707,6 +11801,10 @@ i234.me myds.me synology.me +// TAIFUN Software AG : http://taifun-software.de +// Submitted by Bjoern Henke +taifun-dns.de + // TASK geographical domains (www.task.gda.pl/uslugi/dns) gda.pl gdansk.pl @@ -11719,6 +11817,12 @@ sopot.pl bloxcms.com townnews-staging.com +// TransIP : htts://www.transip.nl +// Submitted by Rory Breuk +*.transurl.be +*.transurl.eu +*.transurl.nl + // TuxFamily : http://tuxfamily.org // Submitted by TuxFamily administrators tuxfamily.org From 21d02a1192827f213b9ed8c3e60472a3731208c6 Mon Sep 17 00:00:00 2001 From: Peter Bowen Date: Sat, 12 Aug 2017 19:58:05 -0700 Subject: [PATCH 2/3] Sync list --- data/list.txt | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/data/list.txt b/data/list.txt index 019f52bf..1258bdd8 100644 --- a/data/list.txt +++ b/data/list.txt @@ -6593,6 +6593,9 @@ yt // xn--54b7fta0cc ("Bangla", Bangla) : BD বাংলা +// xn--90ae ("bg", Bulgarian) : BG +бг + // xn--90ais ("bel", Belarusian/Russian Cyrillic) : BY // Operated by .by registry бел @@ -6628,6 +6631,34 @@ yt // https://www2.hkirc.hk/register/rules.jsp 香港 +// xn--2scrj9c ("Bharat", Kannada) : IN +// India +ಭಾರತ + +// xn--3hcrj9c ("Bharat", Oriya) : IN +// India +ଭାରତ + +// xn--45br5cyl ("Bharatam", Assamese) : IN +// India +ভাৰত + +// xn--h2breg3eve ("Bharatam", Sanskrit) : IN +// India +भारतम् + +// xn--h2brj9c8c ("Bharot", Santali) : IN +// India +भारोत + +// xn--mgbgu82a ("Bharat", Sindhi) : IN +// India +ڀارت + +// xn--rvc1e0am3e ("Bharatam", Malayalam) : IN +// India +ഭാരതം + // xn--h2brj9c ("Bharat", Devanagari) : IN // India भारत @@ -10657,6 +10688,10 @@ betainabox.com // Submitted by Nathan O'Sullivan bnr.la +// Boomla : https://boomla.com +// Submitted by Tibor Halter +boomla.net + // Boxfuse : https://boxfuse.com // Submitted by Axel Fontaine boxfuse.io @@ -11168,6 +11203,26 @@ ddnss.org definima.net definima.io +// Dynu.com : https://www.dynu.com/ +// Submitted by Sue Ye +ddnsfree.com +ddnsgeek.com +giize.com +gleeze.com +kozow.com +loseyourip.com +ooguy.com +theworkpc.com +casacam.net +dynu.net +accesscam.org +camdvr.org +freeddns.org +mywire.org +webredirect.org +myddns.rocks +blogsite.xyz + // dynv6 : https://dynv6.com // Submitted by Dominik Menke dynv6.net @@ -11249,9 +11304,11 @@ us.eu.org eu-1.evennode.com eu-2.evennode.com eu-3.evennode.com +eu-4.evennode.com us-1.evennode.com us-2.evennode.com us-3.evennode.com +us-4.evennode.com // eDirect Corp. : https://hosting.url.com.tw/ // Submitted by C.S. chang @@ -11604,6 +11661,10 @@ git-repos.de lcube-server.de svn-repos.de +// LiquidNet Ltd : http://www.liquidnetlimited.com/ +// Submitted by Victor Velchev +we.bs + // Lukanet Ltd : https://lukanet.com // Submitted by Anton Avramov barsy.bg @@ -11646,6 +11707,17 @@ cloudapp.net // Submitted by glob bmoattachments.org +// MSK-IX : https://www.msk-ix.ru/ +// Submitted by Khannanov Roman +net.ru +org.ru +pp.ru + +// Netlify : https://www.netlify.com +// Submitted by Jessica Parsons +bitballoon.com +netlify.com + // Neustar Inc. // Submitted by Trung Tran 4u.com @@ -11751,6 +11823,10 @@ sytes.net webhop.me zapto.org +// NodeArt : https://nodeart.io +// Submitted by Konstantin Nosov +stage.nodeart.io + // Nodum B.V. : https://nodum.io/ // Submitted by Wietse Wind nodum.co @@ -11760,6 +11836,46 @@ nodum.io // Submitted by Matthew Brown nyc.mn +// NymNom : https://nymnom.com/ +// Submitted by Dave McCormack +nom.ae +nom.ai +nom.al +nym.by +nym.bz +nom.cl +nom.gd +nom.gl +nym.gr +nom.gt +nom.hn +nom.im +nym.kz +nym.la +nom.li +nym.li +nym.lt +nym.lu +nym.me +nom.mk +nym.mx +nom.nu +nym.nz +nym.pe +nym.pt +nom.pw +nom.qa +nom.rs +nom.si +nym.sk +nym.su +nym.sx +nym.tw +nom.ug +nom.uy +nom.vc +nom.vg + // Octopodal Solutions, LLC. : https://ulterius.io/ // Submitted by Andrew Sampson cya.gg @@ -11953,6 +12069,10 @@ stackspace.space // Submitted by Philip Hutchins storj.farm +// Sub 6 Limited: http://www.sub6.com +// Submitted by Dan Miller +temp-dns.com + // Synology, Inc. : https://www.synology.com/ // Submitted by Rony Weng diskstation.me From f64a77f85e10a2a018fb3c1d8fdad91eeb1917ba Mon Sep 17 00:00:00 2001 From: Peter Bowen Date: Sat, 12 Aug 2017 20:11:07 -0700 Subject: [PATCH 3/3] Speed up rule processing with a mixed hash/tree model --- lib/public_suffix.rb | 24 +++----- lib/public_suffix/domain.rb | 8 +-- lib/public_suffix/list.rb | 112 ++++++------------------------------ lib/public_suffix/rules.rb | 75 ++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 113 deletions(-) create mode 100644 lib/public_suffix/rules.rb diff --git a/lib/public_suffix.rb b/lib/public_suffix.rb index 7b79dd3b..5dd11640 100644 --- a/lib/public_suffix.rb +++ b/lib/public_suffix.rb @@ -7,7 +7,7 @@ require_relative "public_suffix/domain" require_relative "public_suffix/version" require_relative "public_suffix/errors" -require_relative "public_suffix/rule" +require_relative "public_suffix/rules" require_relative "public_suffix/list" # PublicSuffix is a Ruby domain name parser based on the Public Suffix List. @@ -64,19 +64,16 @@ module PublicSuffix # If domain is not a valid domain. # @raise [PublicSuffix::DomainNotAllowed] # If a rule for +domain+ is found, but the rule doesn't allow +domain+. - def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false) + def self.parse(name, list: List.default, ignore_private: false) what = normalize(name) raise what if what.is_a?(DomainInvalid) - rule = list.find(what, default: default_rule, ignore_private: ignore_private) + rule = list.find(what, ignore_private: ignore_private) # rubocop:disable Style/IfUnlessModifier if rule.nil? raise DomainInvalid, "`#{what}` is not a valid domain" end - if rule.decompose(what).last.nil? - raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy" - end # rubocop:enable Style/IfUnlessModifier decompose(what, rule) @@ -119,13 +116,8 @@ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore # @param [String, #to_s] name The domain name or fully qualified domain name to validate. # @param [Boolean] ignore_private # @return [Boolean] - def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false) - what = normalize(name) - return false if what.is_a?(DomainInvalid) - - rule = list.find(what, default: default_rule, ignore_private: ignore_private) - - !rule.nil? && !rule.decompose(what).last.nil? + def self.valid?(name, list: List.default, ignore_private: false) + !normalize(name).is_a?(DomainInvalid) end # Attempt to parse the name and returns the domain, if valid. @@ -146,13 +138,13 @@ def self.domain(name, **options) # private def self.decompose(name, rule) - left, right = rule.decompose(name) + rule_len = rule.split(DOT).length + parts = name.split(DOT) - parts = left.split(DOT) # If we have 0 parts left, there is just a tld and no domain or subdomain # If we have 1 part left, there is just a tld, domain and not subdomain # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain - tld = right + tld = rule.empty? ? nil : parts.pop(rule_len).join(DOT) sld = parts.empty? ? nil : parts.pop trd = parts.empty? ? nil : parts.join(DOT) diff --git a/lib/public_suffix/domain.rb b/lib/public_suffix/domain.rb index 4c4edd44..88e3107f 100644 --- a/lib/public_suffix/domain.rb +++ b/lib/public_suffix/domain.rb @@ -133,7 +133,7 @@ def name # # @return [String] def domain - [@sld, @tld].join(DOT) if domain? + [@sld, @tld].compact.join(DOT) if domain? end # Returns a subdomain-like representation of this object @@ -165,7 +165,7 @@ def domain # # @return [String] def subdomain - [@trd, @sld, @tld].join(DOT) if subdomain? + [@trd, @sld, @tld].compact.join(DOT) if subdomain? end # Checks whether self looks like a domain. @@ -196,7 +196,7 @@ def subdomain # # @return [Boolean] def domain? - !(@tld.nil? || @sld.nil?) + !@sld.nil? end # Checks whether self looks like a subdomain. @@ -227,7 +227,7 @@ def domain? # # @return [Boolean] def subdomain? - !(@tld.nil? || @sld.nil? || @trd.nil?) + !(@sld.nil? || @trd.nil?) end end diff --git a/lib/public_suffix/list.rb b/lib/public_suffix/list.rb index 04e53a98..3bd69522 100644 --- a/lib/public_suffix/list.rb +++ b/lib/public_suffix/list.rb @@ -69,6 +69,7 @@ def self.default=(value) def self.parse(input, private_domains: true) comment_token = "//".freeze private_token = "===BEGIN PRIVATE DOMAINS===".freeze + space_re = /\p{Space}/ section = nil # 1 == ICANN, 2 == PRIVATE new do |list| @@ -90,7 +91,8 @@ def self.parse(input, private_domains: true) next else - list.add(Rule.factory(line, private: section == 2)) + rule = line.split(space_re).first + list.add(rule, private: section == 2) end end @@ -103,41 +105,23 @@ def self.parse(input, private_domains: true) # @yield [self] Yields on self. # @yieldparam [PublicSuffix::List] self The newly created instance. def initialize - @rules = {} + @rules = Rules.new + add('*', private: false) yield(self) if block_given? end - - # Checks whether two lists are equal. - # - # List one is equal to two, if two is an instance of - # {PublicSuffix::List} and each +PublicSuffix::Rule::*+ - # in list one is available in list two, in the same order. - # - # @param other [PublicSuffix::List] the List to compare - # @return [Boolean] - def ==(other) - return false unless other.is_a?(List) - equal?(other) || @rules == other.rules - end - alias eql? == - - # Iterates each rule in the list. - def each(&block) - Enumerator.new do |y| - @rules.each do |key, node| - y << entry_to_rule(node, key) - end - end.each(&block) - end - - # Adds the given object to the list and optionally refreshes the rule index. # # @param rule [PublicSuffix::Rule::*] the rule to add to the list # @return [self] - def add(rule) - @rules[rule.value] = rule_to_entry(rule) + def add(rule, private: false) + exception = false + if rule[0] == BANG + exception = true + rule = rule[1..-1] + end + lbls = rule.split(DOT).reverse + @rules.add(lbls, exception, private) self end alias << add @@ -160,7 +144,7 @@ def empty? # # @return [self] def clear - @rules.clear + @rules = Rules.new self end @@ -169,77 +153,19 @@ def clear # @param name [#to_s] the hostname # @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches # @return [PublicSuffix::Rule::*] - def find(name, default: default_rule, **options) - rule = select(name, **options).inject do |l, r| - return r if r.class == Rule::Exception - l.length > r.length ? l : r - end - rule || default + def find(name, ignore_private: false) + lbls = name.split(DOT).reverse + r = @rules.get_regdom(lbls, !ignore_private) + r.reverse[1..-1].join(DOT) end - # Selects all the rules matching given hostame. - # - # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as - # private domain. Note that the rules will still be part of the loop. - # If you frequently need to access lists ignoring the private domains, - # you should create a list that doesn't include these domains setting the - # `private_domains: false` option when calling {.parse}. - # - # Note that this method is currently private, as you should not rely on it. Instead, - # the public interface is {#find}. The current internal algorithm allows to return all - # matching rules, but different data structures may not be able to do it, and instead would - # return only the match. For this reason, you should rely on {#find}. - # - # @param name [#to_s] the hostname - # @param ignore_private [Boolean] - # @return [Array] - def select(name, ignore_private: false) - name = name.to_s - - parts = name.split(DOT).reverse! - index = 0 - query = parts[index] - rules = [] - - loop do - match = @rules[query] - if !match.nil? && (ignore_private == false || match.private == false) - rules << entry_to_rule(match, query) - end - - index += 1 - break if index >= parts.size - query = parts[index] + DOT + query - end - - rules - end - private :select - # Gets the default rule. # # @see PublicSuffix::Rule.default_rule # # @return [PublicSuffix::Rule::*] def default_rule - PublicSuffix::Rule.default - end - - - protected - - attr_reader :rules - - - private - - def entry_to_rule(entry, value) - entry.type.new(value: value, length: entry.length, private: entry.private) - end - - def rule_to_entry(rule) - Rule::Entry.new(rule.class, rule.length, rule.private) + '*' end - end end diff --git a/lib/public_suffix/rules.rb b/lib/public_suffix/rules.rb new file mode 100644 index 00000000..b8ae417a --- /dev/null +++ b/lib/public_suffix/rules.rb @@ -0,0 +1,75 @@ +# = Public Suffix +# +# Domain name parser based on the Public Suffix List. +# +# Copyright (c) 2009-2017 Simone Carletti + +module PublicSuffix + + # A Rule is a special object which holds a single definition + # of the Public Suffix List. + # + # There are 3 types of rules, each one represented by a specific + # subclass within the +PublicSuffix::Rule+ namespace. + # + # To create a new Rule, use the {PublicSuffix::Rule#factory} method. + # + # PublicSuffix::Rule.factory("ar") + # # => # + # + class Rules + def initialize + @children = {} + @terminus = false + @priv = false + @exception = false + end + + def empty? + @children.empty? && !@terminus + end + + def size + sz = @terminus ? 1 : 0 + @children.each{|k,v|sz += v.size} + sz + end + + def add(x, excpt, priv) + lbl = x.shift + if lbl.nil? + raise 'Duplicate rule' if @terminus + @terminus = true + @priv = priv + @exception = excpt + return + end + @children[lbl] ||= Rules.new + @children[lbl].add(x, excpt, priv) + end + + def get_regdom(lbls, priv = true, matched_lbls = []) + # Avoid modifying our input by copying it first + lbls = lbls.dup + lbl = lbls.shift + if lbl.nil? + if @terminus && (!@priv || priv) + if @exception + return matched_lbls + end + raise DomainNotAllowed, "#{matched_lbls.reverse.join(".")} is not allowed according to Registry policy" + end + return nil + end + r = @children[lbl].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?(lbl) + return r if !r.nil? + r = @children['*'].get_regdom(lbls, priv, matched_lbls + [lbl]) if @children.key?('*') + return r if !r.nil? + if @terminus && (!@priv || priv) + return matched_lbls if @exception + return matched_lbls + [lbl] + end + nil + end + end +end