Skip to content

Commit

Permalink
Merge branch 'copyright-file'
Browse files Browse the repository at this point in the history
* Branch commit log:
  doc/copyright: specify Copyright and License for all files
	We use misc/checkcrlist.py in `make check-copyright` to validate
	that all source files have a matching pattern or entry.
  misc/Makefile.mk: check-copyright: use checkcrlist.py to check copyright file
	* check-copyright: new rule to check all source files have a matching entry
	* check-copyright-verbose: new rule to suggest new copyright entries
  misc/checkcrlist.py: detect mismatching 'Files:' and file-path relations
	Given a copyright file, find missing or extraneous 'Files:' patterns
  misc/mkcopyright.py: read list of files via -f filelist
	* doc/Makefile.mk, misc/Makefile.mk: pass files via -f into
	  mkcopyright.py to support paths containing whitespaces
	* misc/mkcopyright.py: read list of files via -f filelist

Signed-off-by: Tim Janik <[email protected]>
  • Loading branch information
tim-janik committed Jan 8, 2025
2 parents f0e7a8b + c947822 commit 5c96187
Show file tree
Hide file tree
Showing 6 changed files with 280 additions and 26 deletions.
2 changes: 1 addition & 1 deletion Makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ int main (int argc, char *argv[])
endef

# == dist ==
extradist ::= ChangeLog doc/copyright TAGS ls-tree.lst # doc/README
extradist ::= ChangeLog TAGS ls-tree.lst # doc/README
dist_exclude := $(strip \
external/rapidjson/bin \
external/rapidjson/doc \
Expand Down
13 changes: 1 addition & 12 deletions doc/Makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ doc/install.files ::= $(strip \
$>/doc/NEWS.html \
$>/doc/README.md \
$>/doc/README.html \
$>/doc/copyright \
doc/copyright \
)
doc/pdf.files := $>/doc/anklang-manual.pdf $>/doc/anklang-internals.pdf

Expand All @@ -46,17 +46,6 @@ $(filter %.md, $(doc/install.files)): $>/doc/%.md: %.md doc/Makefile.mk | $>/d
$(QECHO) COPY $<
$Q $(CP) $< $@

# == doc/copyright ==
$>/doc/copyright: misc/mkcopyright.py doc/copyright.ini $>/ls-tree.lst | $>/doc/
$(QGEN)
$Q if test -r .git ; then \
misc/mkcopyright.py -e -c doc/copyright.ini \
$$(cat $>/ls-tree.lst) > $@.tmp ; \
else \
$(CP) doc/copyright $@.tmp ; \
fi
$Q mv $@.tmp $@

# == doc/jsdocs.md ==
doc/jsdocs_js := $(wildcard ui/*.js ui/b/*.js)
doc/jsdocs_md := $(doc/jsdocs_js:ui/%.js=$>/doc/jsdocsmd/%.md)
Expand Down
87 changes: 87 additions & 0 deletions doc/copyright
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
Upstream-Name: anklang
Upstream-Contact: Tim Janik <[email protected]>
Source: https://github.com/tim-janik/anklang/

Files:
.git*
.github/workflows/*.*
LICENSE
Makefile.mk
NEWS.md
README.md
ase/*.c
ase/*.cc
ase/*.h
ase/*.hh
ase/Makefile.mk
devices/*
devices/blepsynth/*
devices/freeverb/Makefile.mk
devices/freeverb/freeverb.cc
devices/liquidsfz/*
devices/saturation/*
doc/*.*
doc/copyright
doc/javascript/*.*
doc/style/*.*
electron/*.*
images/*.*
images/icons/*.*
images/knobs/*.*
jsonipc/*.*
jsonipc/Makefile
misc/*.*
misc/AppRun
misc/blame-lines
package.json
rand/hotspots.sh
ui/*.*
ui/assets/*.*
ui/b/*.*
ui/cursors/*.*
x11test/*.*
Copyright: Copyright (C) 2020-Present The Anklang Project Contributors
License: MPL-2.0

Files:
**/GNUmakefile
GNUmakefile
ase/mime-types.hh
devices/freeverb/*
jsonipc/jsonipc.js
jsonipc/testjsonipc.cc
misc/checkcrlist.py
x11test/*.json
Copyright: Copyright (C) 2020-Present The Anklang Project Contributors
License: Unlicense

License: MPL-2.0
Mozilla Public License 2.0
https://www.mozilla.org/MPL/2.0/
https://opensource.org/licenses/MPL-2.0

License: Unlicense
This is free and unencumbered software released into the public domain.
.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
.
For more information, please refer to <https://unlicense.org/>
10 changes: 7 additions & 3 deletions misc/Makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,14 @@ misc/uninstall: FORCE
uninstall: misc/uninstall

# == Check Copyright Notices ==
check-copyright: misc/mkcopyright.py doc/copyright.ini $>/ls-tree.lst
check-copyright: doc/copyright misc/checkcrlist.py $>/ls-tree.lst
$(QGEN)
$Q misc/mkcopyright.py -b -u -e -c doc/copyright.ini $$(cat $>/ls-tree.lst)
CHECK_TARGETS += $(if $(HAVE_GIT), check-copyright)
$Q misc/checkcrlist.py -e $>/ls-tree.lst $<
CHECK_TARGETS += check-copyright
check-copyright-verbose: doc/copyright misc/checkcrlist.py $>/ls-tree.lst
$(QGEN)
misc/checkcrlist.py --git $>/ls-tree.lst $<
# misc/mkcopyright.py -c doc/copyright.ini -f $>/ls-tree.lst

# == appimagetools/appimage-runtime-zstd ==
$>/appimagetools/appimage-runtime-zstd: | $>/appimagetools/
Expand Down
167 changes: 167 additions & 0 deletions misc/checkcrlist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3
# Dedicated to the Public Domain under the Unlicense: https://unlicense.org/UNLICENSE

import sys, os, re, subprocess, getopt, itertools
import fnmatch
from datetime import datetime
sys.stdin.reconfigure (encoding='utf-8')

def die (*args): print (sys.argv[0] + ': error:', *args, file = sys.stderr); sys.exit (1)

def glob_translate (inputstr):
"""Translate pathname with wildcards to regexp."""
# TODO: use glob.translate from Python 3.13
pat = ''
for part in re.split (r'(\*\*|\*|\?)', inputstr):
if part == r'**': pat += r'.*'
elif part == r'*': pat += r'[^/]*'
elif part == r'?': pat += r'.'
elif part: pat += re.escape (part)
return fr'(?s:{pat})\Z'

def print_help (arg0, exit = None):
# TODO: --file-errors --pattern-errors
u = "Usage: %s [OPTIONS] <FILELIST> [COPYRIGHTFILES...]" % arg0
h = "Check `Files:` patterns from COPYRIGHTFILES against lines in FILELIST.\n"
h += "OPTIONS:\n"
h += " -e Exit with an error if patterns mismatch\n"
h += " --git Print Copyright info from Git\n"
h += " -h, --help Show command help\n"
if exit: # != 0
print (u, file = sys.stderr)
sys.exit (exit)
print (u)
print (h.rstrip())
if exit == 0:
sys.exit (0)

FILE_PATTERNS = [] # [ ( regex, [ COUNT ], COPYRIGHTFILE, LINE, pstr )... ]
COPYRIGHTFILES = []
FILELIST = None
ERROR_ON_MISMATCH = False
GIT_COPYRIGHT = False

def parse_options (sysargv):
opts, argv = getopt.getopt (sysargv[1:], 'eh', [ 'help', 'git' ])
for k, v in opts:
if k == '-e':
global ERROR_ON_MISMATCH
ERROR_ON_MISMATCH = True
elif k == '-h' or k == '--help':
print_help (sysargv[0], 0)
elif k == '--git':
global GIT_COPYRIGHT
GIT_COPYRIGHT = True
if len (argv) != 2:
die ("at least two input files are required: <FILELIST> <COPYRIGHTFILE>")
global COPYRIGHTFILES, FILELIST
FILELIST = argv[0]
COPYRIGHTFILES[:] = argv[1:]

def add_pattern (filename, lineno, pattern):
global FILE_PATTERNS
tup = ( re.compile (glob_translate (pattern), 0), [0], filename, lineno, pattern )
FILE_PATTERNS.append (tup)

def parse_copyrightfile (filename):
lineno, in_files = 0, False
for line in open (filename, 'rt').read().splitlines():
lineno += 1
if line.startswith ('Files:'):
in_files = True
if line[7:]:
add_pattern (filename, lineno, line[7:])
elif in_files and line.startswith (' '):
add_pattern (filename, lineno, line[1:])
else:
in_files = False

def git_copyright (filename):
copyrights = {}
# gather copyright history
for l in shcmd ('git', 'log', '--follow',
'--dense', '-b', '-w', '--ignore-blank-lines',
'--pretty=%as %an', # 2021-02-03 Author Name
'--', filename).split ('\n'):
if len (l) > 10 and l[10] == ' ':
year = int (l[0:4])
name = l[11:].strip()
copyrights[name] = copyrights.get (name, []) + [ (year, year) ]
# sort, and merge copyright years
clist = [] # [(name,(firstyear,lastyear)),...]
for n, yeardeltas in copyrights.items():
yeardeltas.sort (reverse = True, key = lambda yd: yd[0])
ylist = []
for b, e in yeardeltas:
if len (ylist) and b <= ylist[-1][1] + 1 and ylist[-1][0] <= e + 1:
ylist[-1][0] = min (ylist[-1][0], b)
ylist[-1][1] = max (ylist[-1][1], e)
continue # merged
ylist.append ([b, e])
for yrange in ylist:
clist.append ((n, yrange))
clist.sort (reverse = True, key = lambda yd: yd[1][1] - yd[1][0]) # secondary, sort by largest range
clist.sort (reverse = True, key = lambda yd: yd[1][1]) # primary, sort by latest year
# list copyright entries
clines = []
for n, y in clist:
years = '%u' % y[0] if y[0] == y[1] else '%u-%u' % (y[0], y[1])
clines.append ('Copyright (C) ' + years + ' ' + n)
return clines

def crpathcheck (sysargv):
# parse options and check inputs
parse_options (sysargv)
# compile copyright patterns
for crf in COPYRIGHTFILES:
parse_copyrightfile (crf)
# sort patterns by specificity, i.e. length and absence of wildcards
FILE_PATTERNS[:] = sorted (FILE_PATTERNS, key = lambda tup: (tup[4].count ('**'), tup[4].count ('*'), tup[4].count ('?'), -len (tup[4]), tup[4]))
#print ('\n'.join (str (e) for e in FILE_PATTERNS))
# read input file or stdin
inputstream = sys.stdin if FILELIST == '-' else open (FILELIST, 'rt')
# check all files for matching pattern
fileerrors = 0
for fileline in inputstream.read().splitlines():
fmatch = False
for tup in FILE_PATTERNS:
if tup[0].match (fileline):
tup[1][0] += 1
fmatch = True
break
if not fmatch:
print (sys.argv[0] + ':', 'UNKNOWN-COYPRIGHT:', fileline, file = sys.stderr)
fileerrors += 1
if not fmatch and GIT_COPYRIGHT:
clines = git_copyright (fileline)
if len (clines) > 0:
print ('Files:', fileline)
if len (clines) == 1: print ('Copyright:', clines[0])
else:
print ('Copyright:')
for l in clines:
print (' ' + l)
print ('License: ?')
print ()
# check for unused patterns
patternerrors = 0
for tup in FILE_PATTERNS:
if tup[1][0] == 0:
print ('%s:%u:' % (tup[2], tup[3]), 'UNUSED-ENTRY:', tup[4])
patternerrors += 1
# error out on -e
if ERROR_ON_MISMATCH and patternerrors:
die ("failed to match copyright entry in %u cases" % patternerrors)
if ERROR_ON_MISMATCH and fileerrors:
die ("failed to match file to copyright entry in %u cases" % fileerrors)
sys.exit (0)

def shcmd (*args):
process = subprocess.Popen (args, stdout = subprocess.PIPE)
out, err = process.communicate()
if process.returncode:
raise Exception ('%s: failed with status (%d), full command:\n %s' % (args[0], process.returncode, ' '.join (args)))
return out.decode ('utf-8')

if __name__ == '__main__':
crpathcheck (sys.argv)
27 changes: 17 additions & 10 deletions misc/mkcopyright.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# Dedicated to the Public Domain under the Unlicense: https://unlicense.org/UNLICENSE

import sys, os, re, subprocess, getopt
import sys, os, re, subprocess, getopt, itertools
from datetime import datetime

# TODO:
Expand Down Expand Up @@ -112,8 +112,8 @@ def find_copyrights (filename):
m = crpattern.match (line.strip())
if m:
a, b = m.group (1).strip(), m.group (2).strip()
if a[0] not in '0123456789':
if b[0] not in '0123456789':
if len (a) < 1 or a[0] not in '0123456789':
if len (b) < 1 or b[0] not in '0123456789':
continue
b, a = a, b
copyrights[b] = copyrights.get (b, []) + parse_years (a)
Expand Down Expand Up @@ -187,14 +187,15 @@ def print_help (arg0, exit = None):
h = "Scan FILES for licenses and list copyrights from Git(1) authors.\n"
h += "OPTIONS:\n"
h += " -b Display brief license list\n"
h += " -c<inifile> Read config file in INI format\n"
h += " -c<INIFILE> Read config file in INI format\n"
h += " -e Exit with an error if any files are unlicensed\n"
h += " -f<FILE> Read paths from FILE, one per line\n"
h += " -h, --help Show command help\n"
h += " -l List licensed files only\n"
h += " -u List unlicensed files only\n"
h += " -C<contact> Add Upstream-Contact field\n"
h += " -N<name> Add Upstream-Name field\n"
h += " -S<source> Add Source field\n"
h += " -C<CONTACT> Add Upstream-Contact field\n"
h += " -N<NAME> Add Upstream-Name field\n"
h += " -S<SOURCE> Add Source field\n"
if exit: # != 0
print (u, file = sys.stderr)
sys.exit (exit)
Expand All @@ -205,6 +206,7 @@ def print_help (arg0, exit = None):

default_config = {
'brief': False,
'filelist': None,
'error_unlicensed': False,
'max_header_lines': 10,
'max_xml_lines': 999,
Expand All @@ -216,7 +218,7 @@ def parse_options (sysargv, dfltconfig = default_config):
class Config (object): pass
config = Config()
config.__dict__.update (dfltconfig)
opts, argv = getopt.getopt (sysargv[1:], 'blueh' + 'c:C:N:S:', [ 'help', 'spdx-licenses' ])
opts, argv = getopt.getopt (sysargv[1:], 'blueh' + 'c:C:f:N:S:', [ 'help', 'spdx-licenses' ])
upstream_headers = {}
for k, v in opts:
if k == '-b':
Expand All @@ -225,6 +227,8 @@ class Config (object): pass
load_config_sections (v, config)
elif k == '-e':
config.error_unlicensed = True
elif k == '-f':
config.filelist = v
elif k == '-h' or k == '--help':
print_help (sysargv[0], 0)
elif k == '--spdx-licenses':
Expand All @@ -249,7 +253,10 @@ class Config (object): pass
def mkcopyright (sysargv):
# parse options and check inputs
config = parse_options (sysargv)
if len (config.argv) == 0:
fileiter = ()
if config.filelist:
fileiter = open (config.filelist, 'rt').read().splitlines()
if len (config.argv) == 0 and not config.filelist:
print_help (sysargv[0], 7)
# print debian/copyright header
if not config.brief and config.sections['debian/copyright']:
Expand All @@ -262,7 +269,7 @@ def mkcopyright (sysargv):
# gather copyrights and licenses
count_unlicensed = 0
used_licenses = set()
for filename in config.argv:
for filename in itertools.chain (config.argv, fileiter):
# ignore files
if match_section (filename, config, 'ignore'):
continue
Expand Down

0 comments on commit 5c96187

Please sign in to comment.