forked from NixOS/nixpkgs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
manpage.py
287 lines (271 loc) · 12.9 KB
/
manpage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from typing import cast, Iterable, Optional
import re
from markdown_it.token import Token
from .md import Renderer
# roff(7) says:
#
# > roff documents may contain only graphable 7-bit ASCII characters, the space character,
# > and, in certain circumstances, the tab character. The backslash character ‘\’ indicates
# > the start of an escape sequence […]
#
# mandoc_char(7) says about the `'~^ characters:
#
# > In prose, this automatic substitution is often desirable; but when these characters have
# > to be displayed as plain ASCII characters, for example in source code samples, they require
# > escaping to render as follows:
#
# since we don't want these to be touched anywhere (because markdown will do all substituations
# we want to have) we'll escape those as well. we also escape " (macro metacharacter), - (might
# turn into a typographic hyphen), and . (roff request marker at SOL, changes spacing semantics
# at EOL). groff additionally does not allow unicode escapes for codepoints below U+0080, so
# those need "proper" roff escapes/replacements instead.
_roff_unicode = re.compile(r'''[^\n !#$%&()*+,\-./0-9:;<=>?@A-Z[\\\]_a-z{|}]''', re.ASCII)
_roff_escapes = {
ord('"'): "\\(dq",
ord("'"): "\\(aq",
ord('-'): "\\-",
ord('.'): "\\&.",
ord('\\'): "\\e",
ord('^'): "\\(ha",
ord('`'): "\\(ga",
ord('~'): "\\(ti",
}
def man_escape(s: str) -> str:
s = s.translate(_roff_escapes)
return _roff_unicode.sub(lambda m: f"\\[u{ord(m[0]):04X}]", s)
# remove leading and trailing spaces from links and condense multiple consecutive spaces
# into a single space for presentation parity with html. this is currently easiest with
# regex postprocessing and some marker characters. since we don't want to drop spaces
# from code blocks we will have to specially protect *inline* code (luckily not block code)
# so normalization can turn the spaces inside it into regular spaces again.
_normalize_space_re = re.compile(r'''\u0000 < *| *>\u0000 |(?<= ) +''')
def _normalize_space(s: str) -> str:
return _normalize_space_re.sub("", s).replace("\0p", " ")
def _protect_spaces(s: str) -> str:
return s.replace(" ", "\0p")
@dataclass(kw_only=True)
class List:
width: int
next_idx: Optional[int] = None
compact: bool
first_item_seen: bool = False
# this renderer assumed that it produces a set of lines as output, and that those lines will
# be pasted as-is into a larger output. no prefixing or suffixing is allowed for correctness.
#
# NOTE that we output exclusively physical markup. this is because we have to use the older
# mandoc(7) format instead of the newer mdoc(7) format due to limitations in groff: while
# using mdoc in groff works fine it is not a native format and thus very slow to render on
# manpages as large as configuration.nix.5. mandoc(1) renders both really quickly, but with
# groff being our predominant manpage viewer we have to optimize for groff instead.
#
# while we do use only physical markup (adjusting indentation with .RS and .RE, adding
# vertical spacing with .sp, \f[BIRP] escapes for bold/italic/roman/$previous font, \h for
# horizontal motion in a line) we do attempt to copy the style of mdoc(7) semantic requests
# as appropriate for each markup element.
class ManpageRenderer(Renderer):
# whether to emit mdoc .Ql equivalents for inline code or just the contents. this is
# mainly used by the options manpage converter to not emit extra quotes in defaults
# and examples where it's already clear from context that the following text is code.
inline_code_is_quoted: bool = True
link_footnotes: Optional[list[str]] = None
_href_targets: dict[str, str]
_link_stack: list[str]
_do_parbreak_stack: list[bool]
_list_stack: list[List]
_font_stack: list[str]
def __init__(self, manpage_urls: Mapping[str, str], href_targets: dict[str, str]):
super().__init__(manpage_urls)
self._href_targets = href_targets
self._link_stack = []
self._do_parbreak_stack = []
self._list_stack = []
self._font_stack = []
def _join_block(self, ls: Iterable[str]) -> str:
return "\n".join([ l for l in ls if len(l) ])
def _join_inline(self, ls: Iterable[str]) -> str:
return _normalize_space(super()._join_inline(ls))
def _enter_block(self) -> None:
self._do_parbreak_stack.append(False)
def _leave_block(self) -> None:
self._do_parbreak_stack.pop()
self._do_parbreak_stack[-1] = True
def _maybe_parbreak(self, suffix: str = "") -> str:
result = f".sp{suffix}" if self._do_parbreak_stack[-1] else ""
self._do_parbreak_stack[-1] = True
return result
def _admonition_open(self, kind: str) -> str:
self._enter_block()
return (
'.sp\n'
'.RS 4\n'
f'\\fB{kind}\\fP\n'
'.br'
)
def _admonition_close(self) -> str:
self._leave_block()
return ".RE"
def render(self, tokens: Sequence[Token]) -> str:
self._do_parbreak_stack = [ False ]
self._font_stack = [ "\\fR" ]
return super().render(tokens)
def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return man_escape(token.content)
def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._maybe_parbreak()
def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ""
def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ".br"
def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return " "
def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
s = _protect_spaces(man_escape(token.content))
return f"\\fR\\(oq{s}\\(cq\\fP" if self.inline_code_is_quoted else s
def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self.fence(token, tokens, i)
def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
href = cast(str, token.attrs['href'])
self._link_stack.append(href)
text = ""
if tokens[i + 1].type == 'link_close' and href in self._href_targets:
# TODO error or warning if the target can't be resolved
text = self._href_targets[href]
self._font_stack.append("\\fB")
return f"\\fB{text}\0 <"
def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
href = self._link_stack.pop()
text = ""
if self.link_footnotes is not None:
try:
idx = self.link_footnotes.index(href) + 1
except ValueError:
self.link_footnotes.append(href)
idx = len(self.link_footnotes)
text = "\\fR" + man_escape(f"[{idx}]")
self._font_stack.pop()
return f">\0 {text}{self._font_stack[-1]}"
def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._enter_block()
lst = self._list_stack[-1]
maybe_space = '' if lst.compact or not lst.first_item_seen else '.sp\n'
lst.first_item_seen = True
head = "•"
if lst.next_idx is not None:
head = f"{lst.next_idx}."
lst.next_idx += 1
return (
f'{maybe_space}'
f'.RS {lst.width}\n'
f"\\h'-{len(head) + 1}'\\fB{man_escape(head)}\\fP\\h'1'\\c"
)
def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._leave_block()
return ".RE"
def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._list_stack.append(List(width=4, compact=bool(token.meta['compact'])))
return self._maybe_parbreak()
def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._list_stack.pop()
return ""
def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._font_stack.append("\\fI")
return "\\fI"
def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._font_stack.pop()
return self._font_stack[-1]
def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._font_stack.append("\\fB")
return "\\fB"
def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._font_stack.pop()
return self._font_stack[-1]
def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
s = man_escape(token.content).rstrip('\n')
return (
'.sp\n'
'.RS 4\n'
'.nf\n'
f'{s}\n'
'.fi\n'
'.RE'
)
def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
maybe_par = self._maybe_parbreak("\n")
self._enter_block()
return (
f"{maybe_par}"
".RS 4\n"
f"\\h'-3'\\fI\\(lq\\(rq\\fP\\h'1'\\c"
)
def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._leave_block()
return ".RE"
def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_open("Note")
def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_close()
def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_open( "Caution")
def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_close()
def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_open( "Important")
def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_close()
def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_open( "Tip")
def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_close()
def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_open( "Warning")
def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return self._admonition_close()
def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ".RS 4"
def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ".RE"
def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ".PP"
def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ""
def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._enter_block()
return ".RS 4"
def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._leave_block()
return ".RE"
def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
if token.meta['name'] in [ 'command', 'env', 'option' ]:
return f'\\fB{man_escape(token.content)}\\fP'
elif token.meta['name'] in [ 'file', 'var' ]:
return f'\\fI{man_escape(token.content)}\\fP'
elif token.meta['name'] == 'manpage':
[page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
section = section[:-1]
return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP'
else:
raise NotImplementedError("md node not supported yet", token)
def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
# mdoc knows no anchors so we can drop those, but classes must be rejected.
if 'class' in token.attrs:
return super().attr_span_begin(token, tokens, i)
return ""
def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
return ""
def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
raise RuntimeError("md token not supported in manpages", token)
def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
raise RuntimeError("md token not supported in manpages", token)
def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
# max item head width for a number, a dot, and one leading space and one trailing space
width = 3 + len(str(cast(int, token.meta['end'])))
self._list_stack.append(
List(width = width,
next_idx = cast(int, token.attrs.get('start', 1)),
compact = bool(token.meta['compact'])))
return self._maybe_parbreak()
def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
self._list_stack.pop()
return ""