Skip to content

Commit

Permalink
Merge pull request #56 from MartinPacker/OPML-import
Browse files Browse the repository at this point in the history
Opml import
  • Loading branch information
MartinPacker authored Jul 13, 2020
2 parents 6c3dc89 + dfd9d59 commit ba41645
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 33 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,12 @@ will merge any bullet whose text or note is 'A1' with its parent. The text of th

### Input Files

Input files can be in one of four formats:
Input files can be in one of five formats:

* A CSV file that is already in a format supported by iThoughts' Import function.
* A flat file where each line is a new node. Spaces and tabs can be used to indent the text. Here the level of indentation is used to control what level the line is added at.
* A Markdown nested list where each line is a new node. Spaces and tabs can be used to indent the text. Here the level of indentation is used to control what level the line is added at. Only an asterisk (`*`) followed by a space is supported as a list item marker.
* An OPML XML file - with or without `head` or `body` elements.
* An XML file, including one with namespaces (both default and named).

#### Nesting Level Detection
Expand Down
184 changes: 153 additions & 31 deletions filterCSV
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import xml.etree.ElementTree as ElementTree

# from CSVTree import CSVTree

filterCSV_level = "1.4"
filterCSV_date = "27 June, 2020"
filterCSV_level = "1.5"
filterCSV_date = "13 July, 2020"


class ParameterParser:
Expand Down Expand Up @@ -269,35 +269,31 @@ class TreeReader:
csvRows.append(newRow)

return self.ensureMandatoryColumns(csvRows) + (output,)

def readOPMLTree(self, tree):
self.XMLNamespaces = {}

def readXMLTree(self, inputFile):
output = []

# Prepare the input text for namespace parsing and XML parsing
XMLText = "\n".join(inputFile)

# Create the XML parse tree
self.XMLTree = ElementTree.fromstring(XMLText)

# Hunt for the default namespace
split1 = XMLText.split('xmlns="')
if len(split1) == 1:
output.append("No default namespace specification.")
self.defaultXMLNamespace = ""
output = ["XML is specifically 'OPML'.\n"]

if tree[0].tag == "head":
# Level 0 node will be the contents of the title element within the
# head element
titleText = tree[0][0].text.strip()
haveHead = True

if (len(tree) > 1) & (tree[1].tag == "body"):
haveBody = True
bodyElement = tree[1]
else:
haveBody = False
else:
self.defaultXMLNamespace = split1[1].split('"')[0]
output.append(f"Default namespace is '{self.defaultXMLNamespace}'")

# Hunt for other namespaces
self.XMLNamespaces = {}
split3 = XMLText.split("xmlns:")
for fragment in range(len(split3)):
if fragment > 0:
split4 = split3[fragment].split('="')
key = split4[0]
split5 = split4[1].split('"')
value = split5[0]
self.XMLNamespaces[key] = value
haveHead = False

if tree[0].tag == "body":
haveBody = True
bodyElement = tree[0]
else:
haveBody = False

# Build array of rows
csvRows = []
Expand Down Expand Up @@ -333,10 +329,136 @@ class TreeReader:
"level20",
],
)

if haveHead:
headCSVRow = [
"",
"",
"",
"0",
titleText
]

csvRows.append(headCSVRow)

if haveBody:
# Any level 1+ elements are children of the body element
for child in bodyElement:
csvRows += self._readOPMLTree(child,1)
else:
# All top-level children of tree, except head, are level 1
for child in tree:
if child.tag != "head":
csvRows += self._readOPMLTree(child,1)
else:
# Don't have a head row so have to look for body or top-level outline
# elements
if haveBody:
# All top-level children of body element are level 0
for child in bodyElement:
csvRows += self._readOPMLTree(child,0)
else:
# All top-level children of tree are level 0
for child in tree:
csvRows += self._readOPMLTree(child,0)

return self.ensureMandatoryColumns(csvRows) + (output,)

csvRows += self._readXMLTree(self.XMLTree, 0)
def _readOPMLTree(self,XMLNode,level):
csvRows = []
nodeText = XMLNode.attrib["text"]

return self.ensureMandatoryColumns(csvRows) + (output,)
nodeRow = [
"",
"",
"",
str(level)
]

levelBlankCells = [""] * (level)
nodeRow += levelBlankCells

nodeRow.append(nodeText)
csvRows.append(nodeRow)

for child in XMLNode:
csvRows += self._readOPMLTree(child, level + 1)

return csvRows

def readXMLTree(self, inputFile):
output = []

# Prepare the input text for namespace parsing and XML parsing
XMLText = "\n".join(inputFile)

# Create the XML parse tree
self.XMLTree = ElementTree.fromstring(XMLText)

# Check if OPML
if self.XMLTree.tag == "opml":
# Is OPML so treat separately from other XML
return self.readOPMLTree(self.XMLTree)
else:
# Is not OPML
# Hunt for the default namespace
split1 = XMLText.split('xmlns="')
if len(split1) == 1:
output.append("No default namespace specification.")
self.defaultXMLNamespace = ""
else:
self.defaultXMLNamespace = split1[1].split('"')[0]
output.append(f"Default namespace is '{self.defaultXMLNamespace}'")

# Hunt for other namespaces
self.XMLNamespaces = {}
split3 = XMLText.split("xmlns:")
for fragment in range(len(split3)):
if fragment > 0:
split4 = split3[fragment].split('="')
key = split4[0]
split5 = split4[1].split('"')
value = split5[0]
self.XMLNamespaces[key] = value

# Build array of rows
csvRows = []

# Insert a header row, with attribute columns and a level0 column plus other
# levels
csvRows.append(
[
"position",
"colour",
"shape",
"level",
"level0",
"level1",
"level2",
"level3",
"level4",
"level5",
"level6",
"level7",
"level8",
"level9",
"level10",
"level11",
"level12",
"level13",
"level14",
"level15",
"level16",
"level17",
"level18",
"level19",
"level20",
],
)

csvRows += self._readXMLTree(self.XMLTree, 0)

return self.ensureMandatoryColumns(csvRows) + (output,)

def resolveNamespaces(self, textToEdit):
editedText = textToEdit.replace("{" + self.defaultXMLNamespace + "}", "")
Expand Down
2 changes: 1 addition & 1 deletion tests/iThoughts-OPML.opml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<opml version="1.0">
<head>
<title>
Central Idea
Central Idea - Really Level 0
</title>
</head>
<body>
Expand Down

0 comments on commit ba41645

Please sign in to comment.