Skip to content

Commit

Permalink
logic: follow the PSL linter evaluation rules
Browse files Browse the repository at this point in the history
This commit reverts that internal logic change.

In the previous commit 72111bd this module changed the evaluation
logic to follow the PSL definition described in the wiki.

Now, it follows the rule described in the linter of the PSL code
repository, where a wildcard declaration implies that its zone root is
also a public suffix.

Signed-off-by: ko-zu <[email protected]>
  • Loading branch information
ko-zu committed Jun 2, 2024
1 parent 61668fd commit a6b950b
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 37 deletions.
50 changes: 20 additions & 30 deletions publicsuffixlist/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,62 +167,52 @@ def _countpublic(self, labels, accept_unknown=None) -> int:
if ll == 1 and accept_unknown:
return 1

# There is the PSL algorithm definition,
# https://github.com/publicsuffix/list/wiki/Format
# There is confusion in rule evaluation.
#
# A domain is said to match a rule if and only if all of the following
# conditions are met:
# 1. When the domain and rule are split into corresponding labels, that
# the domain contains as many or more labels than the rule.
# 2. Beginning with the right-most labels of both the domain and the
# rule, and continuing for all labels in the rule, one finds that for
# every pair, either they are identical, or that the label from the
# rule is "*".
#
# Bacause of rule 1, `foo.com` does not match `*.foo.com`.
#
# However, there is some confusion in rule evaluation.
# test_psl.txt states that city.kobe.jp -> city.kobe.jp
# The test data, test_psl.txt states that
# city.kobe.jp -> city.kobe.jp
# so kobe.jp is public, although kobe.jp is not listed. That means
# test_psl.txt assumes !city.example.com or *.example.com implicitly
# declares example.com as also public.
#
# This module dropped support for the conflicting test case.
# This implicit declaration of wildcard is required and checked by
# the linter.
# https://github.com/publicsuffix/list/blame/de747b657fb0f479667015423c12f98fd47ebf1d/linter/pslint.py#L230
#
# The PSL wiki had listed a wrong example regarding the wildcard.
# This should be resolved by issue:
# https://github.com/publicsuffix/list/issues/1989

# We start from longest to shortcircuit
startfrom = max(0, ll - (self._maxlabel + 1))

excluded = True
for i in range(startfrom, ll):
depth = ll - i
s = ".".join(labels[-depth:])

# the check order must be wild > exact > exception
# this is required to backtrack subdomain wildcard

# exception rule
if ("!" + s) in self._publicsuffix:
# exception rule has wildcard sibiling.
# and the wildcard has implicit root.
return depth - 1

# wildcard match
if ("*." + s) in self._publicsuffix:
# if we have subdomain, that must be checked against exception
# rule.
if i > startfrom and not excluded:
# rule. The backtrack check was performed in the previous loop.
if i > 0:
return depth + 1

# If this is entire match, it is not public from the PSL example.
# ignore it.
# If this is entire match, it is implicit root of wildcard.
return depth

# exact match
if s in self._publicsuffix:
return depth

# exception rule
if ("!" + s) in self._publicsuffix:
# exception rule has wildcard sibiling.
# Although the test case assumes it has implicit public domain on the root,
# in the PSL definition, the next is not always public.
excluded = True
else:
excluded = False

if accept_unknown:
return 1
return 0
Expand Down
24 changes: 21 additions & 3 deletions publicsuffixlist/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def test_wiki_example(self):
"""
psl = PublicSuffixList(source.splitlines())

self.assertEqual(psl.is_private("foo.com"), True)
# According to the linter, this rule is incorrect
# self.assertEqual(psl.is_private("foo.com"), True)
self.assertEqual(psl.is_private("bar.foo.com"), False)
self.assertEqual(psl.is_private("example.bar.foo.com"), True)
self.assertEqual(psl.is_private("foo.bar.jp"), True)
Expand Down Expand Up @@ -326,6 +327,22 @@ def test_subdomain_keep_case(self):
bytestuple(b"Www.Example.Co.Jp"))


def test_wildcardonlytld(self):
source = """
*.bd
"""
psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

self.assertEqual(psl.publicsuffix("bd"), "bd")
self.assertEqual(psl.privatesuffix("bd"), None)

self.assertEqual(psl.publicsuffix("example.bd"), "example.bd")
self.assertEqual(psl.privatesuffix("example.bd"), None)

self.assertEqual(psl.publicsuffix("example.example.bd"), "example.bd")
self.assertEqual(psl.privatesuffix("example.example.bd"), "example.example.bd")


def test_longwildcard(self):
source = """
com
Expand All @@ -339,8 +356,9 @@ def test_longwildcard(self):
self.assertEqual(psl.publicsuffix("example.com"), "com")
self.assertEqual(psl.privatesuffix("example.com"), "example.com")

self.assertEqual(psl.publicsuffix("compute.example.com"), "com")
self.assertEqual(psl.privatesuffix("compute.example.com"), "example.com")
# wildcard implies the root is also public suffix
self.assertEqual(psl.publicsuffix("compute.example.com"), "compute.example.com")
self.assertEqual(psl.privatesuffix("compute.example.com"), None)

self.assertEqual(psl.publicsuffix("region.compute.example.com"), "region.compute.example.com")
self.assertEqual(psl.privatesuffix("region.compute.example.com"), None)
Expand Down
6 changes: 2 additions & 4 deletions publicsuffixlist/test_psl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,8 @@ checkPublicSuffix('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp');
checkPublicSuffix('c.kobe.jp', null);
checkPublicSuffix('b.c.kobe.jp', 'b.c.kobe.jp');
checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
// This are not valid anymore
// https://github.com/publicsuffix/list/issues/1890
// checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
// checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
// TLD with a wildcard rule and exceptions.
checkPublicSuffix('ck', null);
checkPublicSuffix('test.ck', null);
Expand Down

0 comments on commit a6b950b

Please sign in to comment.