Skip to content

Commit

Permalink
Add Github workflow to check _psl DNS entries on PRs (#1933)
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-friedberger authored May 24, 2024
1 parent 029c9a1 commit c2f7b43
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 0 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/check_pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Check PR

on:
pull_request_target:
paths:
- "public_suffix_list.dat"

permissions:
pull-requests: write

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.ref }}
path: local
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
path: other
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- run: pip install -r local/tools/pr_checker/requirements.txt
- run: python local/tools/pr_checker/check_dns.py local/public_suffix_list.dat other/public_suffix_list.dat ${{ github.event.pull_request.number }}
- uses: actions/github-script@v7
with:
script: |
github.rest.issues.addLabels({
issue_number: context.payload.pull_request.number,
owner: context.repo.owner,
repo: context.repo.repo,
labels: ['Autocheck Success']
})
148 changes: 148 additions & 0 deletions tools/pr_checker/check_dns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import click
import dns.message
import dns.name
import dns.query
import dns.resolver
import re


def read_rules(psl_filename):
"""Read rules from a file into a set.
>>> sorted(read_rules('test_psl_01.dat'))
['!main.bar.example.org', '*.bar.example.org', '*.bd', 'a.example.com', 'at', 'b.example.com.', 'example.com', 'example.org', 'foo.example.org']
"""
rules = set()

with open(psl_filename) as f:
for line in f:
line = line.strip()

if line == "":
continue

if line.startswith("//"):
continue

rules.add(line)
return rules


def rule2fqdn(rule):
"""Return the domain name for a rule.
Removes wildcards and exception qualifiers.
>>> rule2fqdn("alpha.beta.example.com")
'alpha.beta.example.com'
>>> rule2fqdn("*.hokkaido.jp")
'hokkaido.jp'
>>> rule2fqdn("!pref.hokkaido.jp")
'pref.hokkaido.jp'
"""

if rule.startswith("*."):
rule = rule[2:]

if rule.startswith("!"):
rule = rule[1:]

if any(illegal_char in rule for illegal_char in "!*"):
print(rule)
assert False

return rule


def check_dns_pr(rule, pr_id):
"""Check _psl DNS entry for a rule.
>>> check_dns_pr("tests.arcane.engineering", 123456)
Rule: tests.arcane.engineering
Checking TXT entry for _psl.tests.arcane.engineering.
DNS answer: "https://github.com/publicsuffix/list/pull/123456" -> PR 123456
True
>>> check_dns_pr("tests.arcane.engineering", 666)
Rule: tests.arcane.engineering
Checking TXT entry for _psl.tests.arcane.engineering.
DNS answer: "https://github.com/publicsuffix/list/pull/123456" -> PR 123456
DNS _psl entry incorrect expected PR 666 != 123456.
False
>>> check_dns_pr("foo.arcane.engineering", 666)
Rule: foo.arcane.engineering
Checking TXT entry for _psl.foo.arcane.engineering.
No answer from nameserver for '_psl.foo.arcane.engineering.'.
False
"""

print(f" Rule: {rule}")
name = dns.name.from_text(rule)
pslname = dns.name.from_text("_psl." + rule2fqdn(rule))
print(f" Checking TXT entry for {pslname}")
try:
# resolver = dns.resolver.Resolver()
# resolver.nameservers = ["213.133.100.102"]
# answer = resolver.resolve(pslname, "TXT")
answer = dns.resolver.resolve(pslname, "TXT")
except dns.resolver.NoNameservers as e:
print(f" No nameserver found for '{pslname}'.")
return False
except dns.resolver.NXDOMAIN:
print(f" No _psl entry for '{name}'.")
return False
except dns.resolver.NoAnswer:
print(f" No answer from nameserver for '{pslname}'.")
return False

for rdata in answer:
if match := re.match(
r"\"https://github.com/publicsuffix/list/pull/(\d+)\"", str(rdata)
):
dns_pr_id = int(match[1])
print(f" DNS answer: {match[0]} -> PR {dns_pr_id}")
if dns_pr_id == pr_id:
return True
else:
print(
f" DNS _psl entry incorrect expected PR {pr_id} != {dns_pr_id}."
)
return False
print("No DNS entry with pull request URL found.")
return False


def psl_diff(current_filename, pull_request_filename):
"""Check _psl DNS entry for a rule.
>>> added, removed = psl_diff("test_psl_01.dat", "test_psl_02.dat")
>>> sorted(added)
['be', 'com']
>>> sorted(removed)
['*.bd', 'a.example.com', 'b.example.com.']
"""
current_rules = read_rules(current_filename)
pull_request_rules = read_rules(pull_request_filename)

removed = current_rules.difference(pull_request_rules)
added = pull_request_rules.difference(current_rules)

return (added, removed)


@click.command()
@click.argument("current_filename")
@click.argument("pull_request_filename")
@click.argument("pr_id", type=click.INT)
def main(current_filename, pull_request_filename, pr_id):
"""This script compares two PSL files and checks the _psl DNS records for the changed rules.
It can be tested using doctests by running `python -m doctest check_dns.py`
"""
added, removed = psl_diff(current_filename, pull_request_filename)

if not all(map(lambda rule: check_dns_pr(rule, pr_id), added + removed)):
exit(1)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions tools/pr_checker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
click==8.1.7
dnspython==2.5.0
21 changes: 21 additions & 0 deletions tools/pr_checker/test_psl_01.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// ===BEGIN ICANN DOMAINS===

at
*.bd

// example.com: https://www.iana.org/domains/reserved
example.com
a.example.com
b.example.com.

// ===END ICANN DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
// (Note: these are in alphabetical order by company name)

// Some company
example.org
foo.example.org
*.bar.example.org
!main.bar.example.org

// ===END PRIVATE DOMAINS===
20 changes: 20 additions & 0 deletions tools/pr_checker/test_psl_02.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// ===BEGIN ICANN DOMAINS===

at
be
com

// example.com: https://www.iana.org/domains/reserved
example.com

// ===END ICANN DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
// (Note: these are in alphabetical order by company name)

// Some company
example.org
foo.example.org
*.bar.example.org
!main.bar.example.org

// ===END PRIVATE DOMAINS===

0 comments on commit c2f7b43

Please sign in to comment.