-
Notifications
You must be signed in to change notification settings - Fork 168
/
Copy pathg.citation.py
executable file
·1020 lines (884 loc) · 33.3 KB
/
g.citation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
############################################################################
#
# MODULE: g.citation
#
# AUTHOR(S): Vaclav Petras <wenzeslaus AT gmail DOT com> (ORCID: 0000-0001-5566-9236)
# Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517)
# Markus Neteler <neteler AT osgeo DOT org> (ORCID: 0000-0003-1916-1966)
#
# PURPOSE: Provide scientific citation for GRASS modules and add-ons.
#
# COPYRIGHT: (C) 2018 by Vaclav Petras and the GRASS Development team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.
#
#############################################################################
# %module
# % description: Provide scientific citation for GRASS modules and add-ons.
# % keyword: general
# % keyword: metadata
# % keyword: citation
# %end
# %option
# % key: module
# % type: string
# % description: GRASS GIS module to be cited
# % multiple: no
# %end
# %option
# % key: format
# % type: string
# % description: Citation format or style
# % options: bibtex,cff,json,pretty-json,csl-json,citeproc,chicago-footnote,dict,plain
# % descriptions: bibtex;BibTeX;cff;Citation File Format;json;JSON;pretty-json;Pretty printed JSON;csl-json;Citation Style Language JSON (citeproc JSON) format;citeproc;Use the citeproc-py library to create the citation (CSL);chicago-footnote;Chicago style for footnotes;dict;Pretty printed Python dictionary;plain;Plain text
# % answer: bibtex
# % required: yes
# %end
# %option
# % key: style
# % type: string
# % description: Citation style for the citeproc formatter (CSL)
# % answer: harvard1
# %end
# %option
# % key: vertical_separator
# % type: string
# % label: Separator of individual citation records
# % description: Inserted before each item
# %end
# %option G_OPT_F_INPUT
# % key: output
# % type: string
# % description: Path of the output file
# % required: no
# %end
# %flag
# % key: a
# % description: Provide citations for all modules
# %end
# %flag
# % key: d
# % label: Add GRASS GIS as dependency to citation
# % description: Add GRASS GIS as dependency, reference, or additional citation to the citation of a module if applicable for the format (currently only CFF)
# %end
# %flag
# % key: s
# % description: Skip errors, provide warning only
# %end
# %rules
# % required: module,-a
# % exclusive: module,-a
# %end
# TODO: if output is provided, write to ascii file
# (otherwise print to command line)
# TODO: Find lhmpom-equivalent in GRASS repository
# x=$(wget -0 - 'http:/foo/g.region.html')
# Which GRASS version is currently used ?
# What Libraries, etc ?
# g.version -erg
from __future__ import print_function
import html
import sys
import os
import re
from collections import defaultdict
import json
from pathlib import Path
from datetime import datetime
from pprint import pprint
import grass.script as gs
def remove_empty_values_from_dict(d):
"""Removes empty entries from a nested dictionary
Iterates and recurses over instances of dict or list and removes
all empty entries. The emptiness is evaluated by conversion to bool
in an if-statement. Values which are instances of bool are passed
as is.
Note that plain dict and list are returned, not the original types.
What is not an instance of instances of dict or list is left
untouched.
"""
if isinstance(d, dict):
return {
k: remove_empty_values_from_dict(v)
for k, v in d.items()
if v or isinstance(v, bool)
}
elif isinstance(d, list):
return [remove_empty_values_from_dict(i) for i in d if i or isinstance(v, bool)]
else:
return d
# TODO: copied from g.manual, possibly move to library
# (lib has also online ones)
def documentation_filename(entry):
"""Get the local path of HTML documentation
Calls fatal when page is not found.
"""
gisbase = os.environ["GISBASE"]
path = os.path.join(gisbase, "docs", "html", entry + ".html")
if not os.path.exists(path) and os.getenv("GRASS_ADDON_BASE"):
path = os.path.join(
os.getenv("GRASS_ADDON_BASE"), "docs", "html", entry + ".html"
)
if not os.path.exists(path):
raise RuntimeError(_("No HTML manual page entry for '%s'") % entry)
return path
def remove_non_author_lines(lines):
"""Remove lines which appear in the authors sec but are not authors
>>> remove_non_author_lines(["Ann Doe", "© 2012", "John Doe"])
['Ann Doe', 'John Doe']
"""
out = []
for line in lines:
if "©" in line:
pass
else:
out.append(line)
return out
def remove_html_tags(lines):
out = []
for line in lines:
line = re.sub("<br.?>", "", line)
line = re.sub("</?[a-z]+ ?[^>]*>", "", line)
out.append(line)
return out
def clean_line_item(text):
"""Clean (commas and spaces) from beginning and end of a text
>>> print(clean_line_item(",Small University, "))
Small University
"""
text = text.strip()
text = re.sub(r"^, *", "", text)
text = re.sub(r",$", "", text)
return text
def get_datetime_from_documentation(text):
"""Extract time of latest change from manual
>>> text = " Latest change: Monday Jun 28 11:54:09 2021 in commit: 1cfc0af029a35a5d6c7dae5ca7204d0eb85dbc55"
>>> get_datetime_from_documentation(text)
datetime.datetime(2022, 9, 18, 23, 55, 9)
"""
date_format = "%A %b %d %H:%M:%S %Y"
datetime_capture = r"^ (Latest change: )(.*)( in commit: ).*"
match = re.search(datetime_capture, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
if not match:
datetime_capture = r"^ (Accessed: )([a-z]{6,9} [a-z]{3} [0-9]{1,2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}).*"
match = re.search(
datetime_capture, text, re.MULTILINE | re.DOTALL | re.IGNORECASE
)
try:
return datetime.strptime(match.group(2).replace(" ", " "), date_format)
except ValueError:
# TODO: raise or fatal? should be in library or module?
raise RuntimeError(
"Cannot extract the time of the latest change from the manual."
"The respective entry does now seem to follow the expected standard."
)
def get_email(text):
"""Get email from text
Returns tuple (email, text_without_email)
Returns (None, text) if not found.
Any whitespace is stripped from the text.
>>> print(get_email("<E. Jorge Tizado ([email protected])")[0])
>>> print(get_email("<E. Jorge Tizado (ej.tizado unileon es)")[0])
>>> email, text = get_email("Andrea Aime (aaime libero it)")
>>> print(text)
Andrea Aime
>>> print(email)
>>> email, text = get_email("Maris Nartiss (maris.nartiss gmail.com)")
>>> print(text)
Maris Nartiss
"""
email = None
# ORCID as text
email_re = re.compile(r"\(([^@]+@[^@]+\.[^@]+)\)", re.IGNORECASE)
match = re.search(email_re, text)
if match:
email = match.group(1)
else:
for domain in ["com", "es", "it"]:
email_re = re.compile(
r"\(([^ ]+) ([^ ]+) ({})\)".format(domain), re.IGNORECASE
)
match = re.search(email_re, text)
if match:
email = "{name}@{service}.{domain}".format(
name=match.group(1), service=match.group(2), domain=match.group(3)
)
break
text = re.sub(email_re, "", text).strip()
return (email, text)
def get_orcid(text):
"""Get ORCID from text
Returns tuple (orcid, text_without_orcid)
Returns (None, text) if not found.
Any whitespace is stripped from the text.
>>> # URL style
>>> print(get_orcid("https://orcid.org/0000-0000-0000-0000")[0])
0000-0000-0000-0000
>>> # ISBN style
>>> print(get_orcid("ORCID 0000-0000-0000-0000")[0])
0000-0000-0000-0000
>>> # URI style
>>> print(get_orcid("orcid:0000-0000-0000-0000")[0])
0000-0000-0000-0000
>>> # no ORCID
>>> print(get_orcid("orcid: No ORCID here, no here: orcid.org.")[0])
None
"""
orcid = None
# ORCID as text
orcid_re = re.compile(r"\(?ORCID:? ?([0-9-]+)\)?", re.IGNORECASE)
match = re.search(orcid_re, text)
if match:
orcid = match.group(1)
else:
# ORCID as URL
orcid_re = re.compile(r"https?://orcid.org/([0-9-]+)", re.IGNORECASE)
match = re.search(orcid_re, text)
if match:
orcid = match.group(1)
text = re.sub(orcid_re, "", text).strip()
return (orcid, text)
def get_authors_from_documentation(text):
r"""Extract authors and associated info from documentation
>>> text = '<h2><a name="author">AUTHOR</a></h2>\nPaul Kelly\n<br><h2>SOURCE CODE</h2>'
>>> authors = get_authors_from_documentation(text)
>>> print(authors[0]['name'])
Paul Kelly
"""
# Some section names are singular, some plural.
# Additional tags can appear in the heading compiled documentation.
# TODO: ...or attributes
# HTML tags or section name can theoretically be different case.
# The "last changed" part might be missing.
# The i and em could be exchanged.
author_section_capture = r"(<h2>.*AUTHOR.*</h2>)(.*)(<h2>.*SOURCE CODE.*</h2>)"
match = re.search(
author_section_capture, text, re.MULTILINE | re.DOTALL | re.IGNORECASE
)
if match:
author_section = match.group(2)
else:
raise RuntimeError(_("Unable to find Authors section"))
raw_author_lines = [
line.strip()
for line in author_section.strip()
.replace("\n", " ")
.replace("<p>", "<br>")
.split("<br>")
if line.strip()
]
raw_author_lines = remove_non_author_lines(raw_author_lines)
raw_author_lines = remove_html_tags(raw_author_lines)
authors = []
feature_heading = None
for line in raw_author_lines:
line = html.unescape(line.strip()) # strip after HTML tag strip
if not line:
continue
institute = None
feature = None
if line.endswith(":"):
feature_heading = line[:-1]
continue
email, text = get_email(text)
orcid, text = get_orcid(text)
ai = line.split(",", 1)
name = clean_line_item(ai[0])
if not email:
email, name = get_email(name)
if len(ai) == 2:
institute = clean_line_item(ai[1])
if " by " in name:
feature, name = name.split(" by ", 1)
elif ": " in name:
feature, name = name.split(": ", 1)
elif feature_heading:
feature = feature_heading
# assuming that names with "and" won't be at the same
# line/record with author unique info like email or orcid
if " and " in name:
names = name.split(" and ", 1)
elif " & " in name:
names = name.split(" & ", 1)
elif " & " in name:
names = name.split(" & ", 1)
else:
names = [name]
for name in names:
# drop academic titles from name
for title in ["Dr. ", "Prof. "]:
if name.startswith(title):
name = name[len(title) :]
authors.append(
{
"name": name,
"institute": institute,
"feature": feature,
"email": email,
"orcid": orcid,
}
)
# TODO: handle unknown/Unknown author
return authors
def get_code_urls_from_documentation(text):
"""Extract URLs from text containing links to module source code
Returns a tuple with URL of the source code and URL of history of
the source code.
>>> text = '<h2>SOURCE CODE</h2><a href="https://github.com/OSGeo/grass/tree/main/raster/r.spread">r.spread source code</a> (<a href="https://github.com/OSGeo/grass/commits/main/raster/r.spread">history</a>)'
>>> get_code_urls_from_documentation(text)
('https://github.com/OSGeo/grass/tree/main/raster/r.spread', 'https://github.com/OSGeo/grass/commits/main/raster/r.spread')
"""
capture = r'<h2>SOURCE CODE</h2>.*<a href="(.+)">[^<]*source code</a>\s+\(<a href="(.+)">history</a>\)'
match = re.search(capture, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
if match:
return match.group(1), match.group(2)
else:
# TODO: raise or fatal? should be in library or module?
raise RuntimeError("The text does not contain source code URLs")
def remove_dots_from_module_name(name):
# TODO: make this an option or perhaps a flag to replace with nothing
# is sufficient to cover most needs
return name.replace(".", "_")
def internal_to_csl_json(citation):
"""Returns the JSON structure as objects (not as one string)"""
authors = []
for author in citation["authors"]:
name = author_name_to_cff(author["name"])
authors.append({"family": name["family"], "given": name["given"]})
return {
"id": citation["module"],
"issued": {"date-parts": [[citation["year"], "1", "1"]]},
"title": "GRASS GIS: " + citation["module"] + " module",
"type": "software",
"author": authors,
}
try:
# can't be inside the function
# (import * is not allowed in function)
# but needed to make citeproc give results
from citeproc.py2compat import *
except ImportError:
pass
def print_using_citeproc(csl_json, keys, style):
from citeproc import CitationStylesStyle, CitationStylesBibliography
from citeproc import Citation, CitationItem
from citeproc import formatter
from citeproc.source.json import CiteProcJSON
def warn(citation_item):
raise RuntimeError(
"Reference with key '{}' not found".format(citation_item.key)
)
bib_source = CiteProcJSON([csl_json])
bib_style = CitationStylesStyle(style, validate=False)
bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)
citations = []
# the following lines just do whatever example in citeproc repo does
for key in keys:
citation = Citation([CitationItem(key)])
bibliography.register(citation)
citations.append(citation)
for citation in citations:
# unused = bibliography.cite(citation, warn_missing_key)
unused = bibliography.cite(citation, warn)
for item in bibliography.bibliography():
print(str(item))
# TODO: Jr. separated by comma
def author_name_to_cff(text):
"""
Currently, we mostly intend this code to help getting legacy records
from GRASS manual pages to a parseable format, so we really need to
address only the national naming styles common for GRASS in 80s-10s.
This practically means American (US) names and couple other styles.
>>> d = author_name_to_cff("Richard G. Lathrop Jr.")
>>> print(d['given'])
Richard G.
>>> print(d['family'])
Lathrop
>>> print(d['suffix'])
Jr.
>>> d = author_name_to_cff("Margherita Di Leo")
>>> print(d['given'])
Margherita
>>> print(d['family'])
Di Leo
"""
particles = ["von", "van", "der", "di", "de"]
suffixes = ["jr", "jnr", "sr", "snr", "junior", "senior"]
roman = "IVX" # if you are 40th, we will fix it for you
def is_suffix(text):
text = text.lower()
for suffix in suffixes:
if text == suffix:
return True
elif len(suffix) <= 3 and text == suffix + ".":
return True
if text.isupper():
bool([char for char in text if char in roman])
return False
def is_middle_initial(text):
if text.isupper():
if len(text) == 2 and text.endswith("."):
return True
elif len(text) == 1:
return True
return False
names = text.split(" ")
# given and family required by CFF 1.0.3
particle = None
suffix = None
if len(names) == 2:
given = names[0]
family = names[1]
elif len(names) == 3:
if is_middle_initial(names[1]):
given = " ".join([names[0], names[1]])
family = names[2]
elif names[1] in particles:
given = names[0]
particle = names[1]
family = names[2]
elif names[1][0].isupper() and names[1].lower() in particles:
# If particle starts with capital, it is often considered
# to be part of family name.
given = names[0]
family = " ".join([names[1], names[2]])
else:
# TODO: since this is for legacy code, we could just
# hardcode the "known" authors such as Maria Antonia Brovelli
raise NotImplementedError(
"Not sure if <{n}> is family or middle name in <{t}>".format(
n=names[1], t=text
)
)
elif len(names) == 4:
# assuming that if you have suffix, you have a middle name
if is_suffix(names[3]):
given = " ".join([names[0], names[1]])
family = names[2]
suffix = names[3]
else:
raise NotImplementedError("Not sure how to split <{}>".format(text))
else:
raise RuntimeError(_("Cannot split name <{}> correctly").format(text))
return {"given": given, "particle": particle, "family": family, "suffix": suffix}
def print_cff(citation, output):
"""Create Citation File Format file from citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
>>> authors = [{'name': 'Joe Doe', 'orcid': '0000-0000-0000-0000'}]
>>> cit = {'module': 'g.tst', 'authors': authors, 'year': 2011}
>>> cit.update({'grass-version': '7.4.1'})
>>> cit.update({'grass-build-date': '2018-06-07'})
>>> print_cff(cit)
cff-version: 1.0.3
message: "If you use this software, please cite it as below."
authors:
- family-names: Doe
given-names: Joe
orcid: 0000-0000-0000-0000
title: "GRASS GIS: g.tst module"
version: 7.4.1
date-released: 2018-06-07
license: GPL-2.0-or-later
"""
print("cff-version: 1.0.3", file=output)
print(
'message: "If you use this software, please cite it as below."',
file=output,
)
print("authors:", file=output)
for author in citation["authors"]:
# note: CFF 1.0.3 specifies mandatory family, mandatory given,
# optional particle (e.g. van), and optional suffix (e.g. III),
# best shot should be taken for names which don't include family
# or given or which have different order
# here we just split based on first space into given and family
name = author_name_to_cff(author["name"])
print(" - family-names:", name["family"], file=output)
print(" given-names:", name["given"], file=output)
if author["orcid"]:
print(" orcid:", author["orcid"], file=output)
print(
'title: "GRASS GIS: ',
citation["module"],
' module"',
sep="",
file=output,
)
print("version:", citation["grass-version"], file=output)
# CFF 1.0.3 does not say expplicitely except for Date (so not any
# string), so assuming YAML timestamp
# (https://yaml.org/type/timestamp.html)
# now we have only the year, so using Jan 1
print("date-released:", citation["grass-build-date"], file=output)
# license string according to https://spdx.org/licenses/
# we know license of GRASS modules should be GPL>=2
print("license: GPL-2.0-or-later", file=output)
if citation.get("keywords", None):
print("keywords:", file=output)
for keyword in citation["keywords"]:
print(" -", keyword, file=output)
if citation.get("references", None):
print("references:", file=output)
for reference in citation["references"]:
# making sure scope, type, and title are first
if reference.get("scope", None):
print(" - scope:", reference["scope"], file=output)
print(" type:", reference["type"], file=output)
else:
print(" - type:", reference["type"], file=output)
print(" title:", reference["title"], file=output)
for key, value in reference.items():
if key in ["scope", "type", "title"]:
continue # already handled
# TODO: add general serialization to YAML
elif key == "authors":
print(" authors:", file=output)
for author in value:
# special order for the name of entity
if "name" in author:
print(
" - name: {name}".format(**author),
file=output,
)
elif "family-names" in author:
print(
" - family-names: {family-names}".format(**author),
file=output,
)
for akey, avalue in author.items():
if akey == "name":
continue
print(
" {akey}: {avalue}".format(**locals()),
file=output,
)
elif key == "keywords":
print(" keywords:", file=output)
for keyword in value:
print(
" - {keyword}".format(**locals()),
file=output,
)
else:
print(
" {key}: {value}".format(**locals()),
file=output,
)
def print_bibtex(citation, output):
"""Create BibTeX entry from citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
>>> print_bibtex({'module': 'g.tst', 'authors': [{'name': 'Joe Doe'}], 'year': 2011})
@software{g.tst,
title = {GRASS GIS: g.tst module},
author = {Joe Doe},
year = {2011}
}
"""
# TODO: make this an option to allow for software in case it is supported
entry_type = "misc"
key = remove_dots_from_module_name(citation["module"])
print("@", entry_type, "{", key, ",", sep="", file=output)
print(
" title = {{",
"GRASS GIS: ",
citation["module"],
" module}},",
sep="",
file=output,
)
author_names = [author["name"] for author in citation["authors"]]
print(
" author = {",
" and ".join(author_names),
"},",
sep="",
file=output,
)
print(
" howpublished = {",
citation["code-url"],
"},",
sep="",
file=output,
)
print(" year = {", citation["year"], "},", sep="", file=output)
print(
" note = {Accessed: ",
citation["access"],
"},",
sep="",
file=output,
)
print("}", file=output)
def print_json(citation, output):
"""Create JSON dump from the citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
"""
cleaned = remove_empty_values_from_dict(citation)
# since the format is already compact, let's make it even more
# compact by omitting the spaces after separators
print(json.dumps(cleaned, separators=(",", ":")), file=output)
def print_pretty_json(citation, output):
"""Create pretty-printed JSON dump from the citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
"""
cleaned = remove_empty_values_from_dict(citation)
# the default separator for list items would leave space at the end
# of each line, so providing a custom one
# only small indent needed, so using 2
# sorting keys because only that can provide consistent output
print(
json.dumps(
cleaned,
separators=(",", ": "),
indent=2,
sort_keys=True,
),
file=output,
)
def print_csl_json(citation, output):
"""Create pretty-printed CSL JSON from the citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
"""
csl = internal_to_csl_json(citation)
# the default separator for list items would leave space at the end
# of each line, so providing a custom one
# only small indent needed, so using 2
# sorting keys because only that can provide consistent output
print(
json.dumps(
csl,
separators=(",", ": "),
indent=2,
sort_keys=True,
),
file=output,
)
def print_chicago_footnote(citation, output):
"""Create chicago-footnote from the citation dictionary
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
"""
num_authors = len(citation["authors"])
authors_text = ""
for i, author in enumerate(citation["authors"]):
authors_text += author["name"]
if i < num_authors - 2:
authors_text += ", "
elif i < num_authors - 1:
# likely with comma but unclear for footnote style
authors_text += ", and "
title = "GRASSS GIS module {}".format(citation["module"])
print(
"{authors_text}, {title} ({grass-version}), computer software ({year}).".format(
authors_text=authors_text, title=title, **citation
),
file=output,
)
def print_plain(citation, output):
"""Create citation from dictionary as plain text
:param dict citation: module citation
:output_io.TextIOWrapper output: sys.stdout or text file stream
>>> print_plain({'module': 'g.tst', 'authors': [{'name': 'Joe Doe'}]})
GRASS GIS module g.tst
Joe Doe
"""
print("GRASS GIS module", citation["module"], file=output)
num_authors = len(citation["authors"])
authors_text = ""
for i, author in enumerate(citation["authors"]):
authors_text += author["name"]
# TODO: not defined if we need institute etc. or not, perhaps
# use default dict
if "institute" in author and author["institute"]:
authors_text += ", {institute}".format(**author)
if "feature" in author and author["feature"]:
authors_text += " ({feature})".format(**author)
if i < num_authors - 1:
authors_text += "\n"
print(authors_text, file=output)
# private dict for format name to function call
# use print_citation()
_FORMAT_FUNCTION = {
"bibtex": print_bibtex,
"cff": print_cff,
"json": print_json,
"pretty-json": print_pretty_json,
"csl-json": print_csl_json,
"chicago-footnote": print_chicago_footnote,
"plain": print_plain,
"dict": lambda d, output: pprint(
dict(d), stream=output
), # only plain dict pretty prints
}
def print_citation(citation, format, output):
"""Create citation from dictionary in a given format
:param dict citation: module citation
:param str format: citation format
:output_io.TextIOWrapper output: sys.stdout or text file stream
"""
# only catch the specific dict access, don't call the function
# funs with special handling of parameters first
# (alternatively all funs can have the most rich unified interface)
if format == "citeproc":
print_using_citeproc(
internal_to_csl_json(citation), [citation["module"]], style="harvard1"
)
return
try:
function = _FORMAT_FUNCTION[format]
except KeyError:
raise RuntimeError(_("Unsupported format or style: %s") % format)
function(citation, output)
def grass_cff_reference(grass_version, scope=None):
"""Reference/citation for GRASS GIS based on CFF (close to CFF)
The parameter grass_version is a g.version dictionary or equivalent.
Returns dictionary with keys of CFF reference (key).
"""
citation = {}
if scope:
citation["scope"] = scope
citation["type"] = "software"
# the team as an entity
citation["authors"] = [
{"name": "The GRASS Development Team", "website": "https://grass.osgeo.org/"}
]
citation["title"] = "GRASS GIS {version}".format(**grass_version)
citation["version"] = grass_version["version"]
# approximation
citation["date-released"] = grass_version["build_date"]
citation["year"] = grass_version["date"]
citation["keywords"] = [
"GIS",
"geospatial analysis",
"remote sensing",
"image processing",
]
citation["license"] = "GPL-2.0-or-later"
return citation
def citation_for_module(name, add_grass=False):
"""Provide dictionary of citation values for a module"""
path = documentation_filename(name)
# derive core strings from lhmpom:
# NAME / AUTHOR / LAST CHANGED / COPYRIGHT: Years + Entity
text = open(path).read()
g_version = gs.parse_command("g.version", flags="g")
# using default empty value, this way we use just if d['k']
# to check presence and non-emptiness at the same time
citation = defaultdict(str)
citation["module"] = name
citation["grass-version"] = g_version["version"]
citation["grass-build-date"] = g_version["build_date"]
citation["authors"] = get_authors_from_documentation(text)
citation["year"] = get_datetime_from_documentation(text).year
citation["access"] = get_datetime_from_documentation(text).isoformat()
code_url, code_history_url = get_code_urls_from_documentation(text)
citation["code-url"] = code_url
citation["url-code-history"] = code_history_url
if add_grass:
scope = "Use the following to cite the whole GRASS GIS"
citation["references"] = [grass_cff_reference(g_version, scope=scope)]
return citation
def get_core_modules():
# test.r3flow manual is non-standard and breaks 'g.citation -a',
# so here standard module prefixes are filtered
# two characters are used, so db and r3 are not matched with a dot
module_prefixes = ["d.", "db", "g.", "h.", "i.", "m.", "r.", "r3", "t.", "v."]
# TODO: see what get_commands() does on MS Windows
modules = sorted(
[cmd for cmd in gs.get_commands()[0] if cmd[0:2] in module_prefixes]
)
return modules
def main(options, flags):
"""Main function to do the module's work
Using minimal design, just getting the input and calling other
functions.
"""
if options["module"]:
names = options["module"].split(",")
if flags["a"]:
names = get_core_modules()
output_format = options["format"]
if output_format == "citeproc":
if not options["style"]:
gs.fatal(
_("Option format=citeproc requires also the option style to be set")
)
vertical_separator = options["vertical_separator"]
output = options["output"]
if output:
try:
output = open(output, "w")
except FileNotFoundError:
gs.fatal(
_(
"No such file or directory '{output_file}'."
" Please choose correct output file path."
).format(output_file=output)
)
except PermissionError:
gs.fatal(
_(
"Permission denied '{output_file}'."
" Please change the permission of the output file"
" to allow writing."
).format(output_file=output)
)
else:
output = sys.stdout
error_count = 0
for name in names:
try:
citation = citation_for_module(name, add_grass=flags["d"])
if vertical_separator:
# TODO: decide if we want the newline here or not
print(vertical_separator, file=output)
print_citation(citation, output_format, output)
except RuntimeError as error:
message = _("Module {name}: {error}").format(**locals())
if flags["s"]:
gs.warning(message)
error_count += 1
continue
else:
output.close()
gs.fatal(message)
output.close()
if flags["s"] and len(names) > 1:
gs.warning(_("Errors in parsing {} modules").format(error_count))
# TODO: consider "Extended by" versus original authors
# LASTCHANGED, COPYRIGHT-YEARS, COPRIGHT-ENTITY
# LEFTOVERS:
# A BibTeX entry for LaTeX users is:
#
# @Manual{GRASS_GIS_software,
# title = {Geographic Resources Analysis Support System (GRASS) Software},
# author = {{GRASS Development Team}},
# organization = {Open Source Geospatial Foundation},
# address = {USA},
# year = {YEAR},