-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
133 lines (109 loc) · 5.21 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# ############################################################################
# Repertory converter / parser / importer for OOREP (https://www.oorep.com/)
# Copyright (c) 2021 Andreas Bauer <[email protected]>
# License: GPL v3.0 (or higher)
from parsita import *
# Types ######################################################################
class Remedy:
def __init__(self, abbrev: str, weight: int):
self.abbrev = abbrev.capitalize()
self.weight = weight
def __str__(self):
if self.weight > 1:
return self.abbrev + " (" + str(self.weight) + ")"
else:
return self.abbrev
def __repr__(self):
return str(self)
class Rubric:
index = -1
parentIndex = -1
fullPath = []
def __init__(self, text: str, depth: int, remedies: list[Remedy]):
self.text = text
self.depth = depth
self.remedies = remedies
def __str__(self):
if len(self.remedies) > 0:
remedies = ""
for remedy in self.remedies:
remedies += str(remedy) + " "
return (self.depth * " ") + self.text + ": " + remedies.rstrip()
else:
return (self.depth * " ") + self.text
def __repr__(self):
return str(self)
def getParentPathFromParentIndices(self, indices: list[int], allRubrics):
fullPath = []
for parent in indices:
fullPath.append(allRubrics[parent].text)
return fullPath[::-1]
def getParentIndices(self, rubric, indices: list[int], allRubrics):
if rubric.parentIndex < 0:
return indices
else:
indices.append(rubric.parentIndex)
return rubric.getParentIndices(allRubrics[rubric.parentIndex], indices, allRubrics)
def getFullPath(self, allRubrics):
path = self.getParentPathFromParentIndices(self.getParentIndices(self, [], allRubrics), allRubrics)
path.append(self.text)
return path
# Parsers ####################################################################
class RemedyLexer(TextParsers, whitespace=None):
abbrev = reg(r'[A-Z](-?[a-z]+)+\.?')
abbrevFourValued = reg(r'[A-Z](-?[A-Z]+)+\.?')
weight = reg(r'[0-9]+') > int
weightBrackets = '(' >> weight << ')'
class RemedyParser(TextParsers, whitespace=None):
remedyFourValued = RemedyLexer.abbrevFourValued > (lambda x: Remedy(x, 4))
remedyNoWeight = RemedyLexer.abbrev > (lambda x: Remedy(x, 1))
remedyWithWeight = RemedyLexer.abbrev << reg(r'[ \t]*') & RemedyLexer.weightBrackets > (lambda x: Remedy(x[0], x[1]))
remedy = remedyWithWeight | remedyFourValued | remedyNoWeight
class RemediesParser(TextParsers, whitespace=None):
remedies = repsep(RemedyParser.remedy, reg(r',?[ \t]+'))
class RubricTextParser(TextParsers, whitespace=None):
rubricRawString = reg(r'[A-Za-z\-äöüßÄÖÜ0-9 \.,;\(\)]+[A-Za-zäöüßÄÖÜ0-9\(\)\.]')
rubricDepth = reg(r'=+') > (lambda x: int(len(x)))
rubricText = rubricDepth & reg(r'[ \t]*') >> rubricRawString
class RubricParser(TextParsers, whitespace=None):
emptyRubric = RubricTextParser.rubricText << reg(r'(\.|:)?[ \t]*$') > (lambda x: Rubric(x[1], x[0], []))
nonEmptyRubric = RubricTextParser.rubricText << ':' & reg(r'[ \t]*') >> RemediesParser.remedies > (lambda x: Rubric(x[0][1], x[0][0], x[1]))
rubric = nonEmptyRubric | emptyRubric
# The actual parser to call from the outside #################################
# Returns a list of type list[Rubric]
def getAllRubricsFromFileContents(lines: list[str]):
rubricIndex = 0 # an index that will be incr. and added to every parsed rubric
resultRubrics = [] # where the parsed rubrics gonna end up in
parents = []
for line in lines:
if len(line) > 0:
result = RubricParser.rubric.parse(line)
if isinstance(result, Failure):
print(result)
exit(1)
else:
rubric = result.value
rubric.index = rubricIndex
# Adjust parents-array to link all rubrics transitively back to the chapter/root rubric node
if rubric.depth == 1:
parents = [(rubric.depth, rubric.index)]
# Subrubrics must only be at most one level deeper than parent.
elif rubric.depth == parents[-1][0] + 1:
rubric.parentIndex = parents[-1][1]
parents.append((rubric.depth, rubric.index))
elif rubric.depth == parents[-1][0]:
del parents[-1]
rubric.parentIndex = parents[-1][1]
parents.append((rubric.depth, rubric.index))
elif rubric.depth < parents[-1][0]:
depthDifference = parents[-1][0] - rubric.depth + 1
for i in range(depthDifference):
del parents[-1]
rubric.parentIndex = parents[-1][1]
parents.append((rubric.depth, rubric.index))
else:
print("ERROR: Subrubrics must only be at most one level deeper than parent. Check out line: " + rubric.text)
exit(1)
resultRubrics.append(rubric)
rubricIndex += 1
return resultRubrics