Skip to content

Commit

Permalink
tools/psltool: support for analyzing a github PR (#2087)
Browse files Browse the repository at this point in the history
  • Loading branch information
danderson authored Aug 8, 2024
1 parent f47fcb3 commit d606a6b
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tools/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ require (
github.com/creachadair/command v0.1.13
github.com/creachadair/flax v0.0.0-20240525192034-44db93b3a8ad
github.com/creachadair/mds v0.15.2
github.com/google/go-github/v63 v63.0.0
github.com/natefinch/atomic v1.0.1
)

require github.com/google/go-querystring v1.1.0 // indirect
6 changes: 6 additions & 0 deletions tools/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@ github.com/creachadair/flax v0.0.0-20240525192034-44db93b3a8ad h1:Fv6FRWgCJTHssl
github.com/creachadair/flax v0.0.0-20240525192034-44db93b3a8ad/go.mod h1:K8bFvn8hMdAljQkaKNc7I3os5Wk36JxkyCkfdZ7S8d4=
github.com/creachadair/mds v0.15.2 h1:es1qGKgRGSaztpvrSQcZ0B9I6NsHYJ1Sa9naD/3OfCM=
github.com/creachadair/mds v0.15.2/go.mod h1:4vrFYUzTXMJpMBU+OA292I6IUxKWCCfZkgXg+/kBZMo=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-github/v63 v63.0.0 h1:13xwK/wk9alSokujB9lJkuzdmQuVn2QCPeck76wR3nE=
github.com/google/go-github/v63 v63.0.0/go.mod h1:IqbcrgUmIcEaioWrGYei/09o+ge5vhffGOcxrO0AfmA=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/natefinch/atomic v1.0.1 h1:ZPYKxkqQOx3KZ+RsbnP/YsgvxWQPGxjC0oBt2AhwV0A=
github.com/natefinch/atomic v1.0.1/go.mod h1:N/D/ELrljoqDyT3rZrsUmtsuzvHkeB/wWjHV22AZRbM=
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
132 changes: 132 additions & 0 deletions tools/internal/github/pr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Package github provides a github client with functions tailored to
// the PSL's needs.
package github

import (
"context"
"fmt"
"os"
"time"

"github.com/google/go-github/v63/github"
)

// Client is a GitHub API client that performs PSL-specific
// operations. The zero value is a client that interacts with the
// official publicsuffix/list repository.
type Client struct {
// Owner is the github account of the repository to query. If
// empty, defaults to "publicsuffix".
Owner string
// Repo is the repository to query. If empty, defaults to "list".
Repo string

client *github.Client
}

func (c *Client) owner() string {
if c.Owner != "" {
return c.Owner
}
return "publicsuffix"
}

func (c *Client) repo() string {
if c.Repo != "" {
return c.Repo
}
return "list"
}

func (c *Client) apiClient() *github.Client {
if c.client == nil {
c.client = github.NewClient(nil)
if token := os.Getenv("GITHUB_TOKEN"); token != "" {
c.client = c.client.WithAuthToken(token)
}
}
return c.client
}

// PSLForPullRequest fetches the PSL files needed to validate the
// given pull request. Returns the PSL file for the target branch, and
// the same but with the PR's changes applied.
func (c *Client) PSLForPullRequest(ctx context.Context, prNum int) (withoutPR, withPR []byte, err error) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

pr, _, err := c.apiClient().PullRequests.Get(ctx, c.owner(), c.repo(), prNum)
if err != nil {
return nil, nil, err
}

if state := pr.GetState(); state != "open" {
return nil, nil, fmt.Errorf("cannot get PSL for PR %d with status %q", prNum, state)
}
if !pr.GetMergeable() {
return nil, nil, fmt.Errorf("cannot get PSL for PR %d, needs rebase", prNum)
}
trialMergeCommit := pr.GetMergeCommitSHA()
if trialMergeCommit == "" {
return nil, nil, fmt.Errorf("no trial merge commit available for PR %d", prNum)
}

prHeadCommit := pr.GetHead().GetSHA()
if prHeadCommit == "" {
return nil, nil, fmt.Errorf("no commit SHA available for head of PR %d", prNum)
}

// We want to return the trial merge commit's PSL as withPR, and
// the non-PR parent of that merge as withoutPR. Github only
// provides information about the trial merge commit and the PR
// head commit in the PR API. It also provides a "base" ref, but
// empirical evidence shows this points at some random commit
// somewhere and updates based on unclear triggers. IOW, it is
// _not_ "master without the PR applied".
//
// Instead, we have to ask the git API for information about the
// trial merge commit, and find the correct withoutPR SHA from
// that.
commitInfo, _, err := c.apiClient().Git.GetCommit(ctx, c.owner(), c.repo(), trialMergeCommit)
if err != nil {
return nil, nil, fmt.Errorf("getting info for trial merge SHA %q: %w", trialMergeCommit, err)
}
var beforeMergeCommit string
if numParents := len(commitInfo.Parents); numParents != 2 {
return nil, nil, fmt.Errorf("unexpected parent count %d for trial merge commit on PR %d, expected 2 parents", numParents, prNum)
}
if commitInfo.Parents[0].GetSHA() == prHeadCommit {
beforeMergeCommit = commitInfo.Parents[1].GetSHA()
} else {
beforeMergeCommit = commitInfo.Parents[0].GetSHA()
}

withoutPR, err = c.PSLForHash(ctx, beforeMergeCommit)
if err != nil {
return nil, nil, err
}
withPR, err = c.PSLForHash(ctx, trialMergeCommit)
if err != nil {
return nil, nil, err
}
return withoutPR, withPR, nil
}

// PSLForHash returns the PSL file at the given git commit hash.
func (c *Client) PSLForHash(ctx context.Context, hash string) ([]byte, error) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

opts := &github.RepositoryContentGetOptions{
Ref: hash,
}
content, _, _, err := c.apiClient().Repositories.GetContents(ctx, c.owner(), c.repo(), "public_suffix_list.dat", opts)
if err != nil {
return nil, fmt.Errorf("getting PSL for commit %q: %w", hash, err)
}
ret, err := content.GetContent()
if err != nil {
return nil, err
}
return []byte(ret), nil
}
83 changes: 83 additions & 0 deletions tools/psltool/psltool.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@ import (
"context"
"errors"
"fmt"
"io"
"log"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"syscall"

"github.com/creachadair/command"
"github.com/creachadair/flax"
"github.com/creachadair/mds/mdiff"
"github.com/natefinch/atomic"
"github.com/publicsuffix/list/tools/internal/github"
"github.com/publicsuffix/list/tools/internal/parser"
)

Expand Down Expand Up @@ -47,6 +50,16 @@ conformance with the PSL project's style rules and policies.`,
SetFlags: command.Flags(flax.MustBind, &validateArgs),
Run: command.Adapt(runValidate),
},
{
Name: "check-pr",
Usage: "<number>",
Help: `Validate an open PR on GitHub.
Validation includes basic issues like parse errors, as well as
conformance with the PSL project's style rules and policies.`,
SetFlags: command.Flags(flax.MustBind, &checkPRArgs),
Run: command.Adapt(runCheckPR),
},
{
Name: "debug",
Commands: []*command.C{
Expand Down Expand Up @@ -151,6 +164,76 @@ func runValidate(env *command.Env, path string) error {
}
}

var checkPRArgs struct {
Owner string `flag:"gh-owner,default=publicsuffix,Owner of the github repository to check"`
Repo string `flag:"gh-repo,default=list,Github repository to check"`
Online bool `flag:"online-checks,Run validations that require querying third-party servers"`
}

func runCheckPR(env *command.Env, prStr string) error {
pr, err := strconv.Atoi(prStr)
if err != nil {
return fmt.Errorf("invalid PR number %q: %w", prStr, err)
}

client := github.Client{
Owner: checkPRArgs.Owner,
Repo: checkPRArgs.Repo,
}
withoutPR, withPR, err := client.PSLForPullRequest(env.Context(), pr)
if err != nil {
return err
}

before, _ := parser.Parse(withoutPR)
after, errs := parser.Parse(withPR)
after.SetBaseVersion(before, true)
errs = append(errs, after.Clean()...)
errs = append(errs, parser.ValidateOffline(after)...)
if validateArgs.Online {
// TODO: no online validations implemented yet.
}

clean := after.MarshalPSL()
if !bytes.Equal(withPR, clean) {
errs = append(errs, errors.New("file needs reformatting, run 'psltool fmt' to fix"))
}

// Print the blocks marked changed, so a human can check that
// something was actually checked by validations.
var changed []*parser.Suffixes
for _, block := range parser.BlocksOfType[*parser.Suffixes](after) {
if block.Changed() {
changed = append(changed, block)
}
}
if len(changed) == 0 {
fmt.Fprintln(env, "No suffix blocks changed. This can happen if only top-level comments have been edited.")
} else {
fmt.Fprintln(env, "Checked the following changed suffix blocks:")
for _, block := range changed {
fmt.Fprintf(env, " %q (%s)\n", block.Info.Name, block.LocationString())
}
}
io.WriteString(env, "\n")

if len(errs) > 0 {
for _, err := range errs {
fmt.Fprintln(env, err)
}
io.WriteString(env, "\n")
}

if l := len(errs); l == 0 {
fmt.Fprintln(env, "PSL file is valid")
return nil
} else if l == 1 {
return errors.New("file has 1 error")
} else {
return fmt.Errorf("file has %d errors", l)
}
}

var debugDumpArgs struct {
Clean bool `flag:"c,Clean AST before dumping"`
Format string `flag:"f,default=ast,Format to dump in, one of 'ast' or 'psl'"`
Expand Down

0 comments on commit d606a6b

Please sign in to comment.