-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordParaNumbering.fs
72 lines (54 loc) · 3 KB
/
WordParaNumbering.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
namespace FrlUtils
open DocumentFormat.OpenXml.Packaging
open DocumentFormat.OpenXml.Wordprocessing
open System
open System.Collections.Generic
open System.Linq
module WordDocumentParser =
let private buildNumberingStyles (numberingPart: NumberingDefinitionsPart) =
let styles = Dictionary<_, _>()
if numberingPart <> null then
for num in numberingPart.Numbering.Elements<NumberingInstance>() do
let numId = num.NumberID
let abstractNumId = num.AbstractNumId.Val.Value
let levels =
numberingPart.Numbering.Elements<AbstractNum>()
|> Seq.tryFind (fun a -> a.AbstractNumberId = abstractNumId)
|> fun maybeAbstractNum -> maybeAbstractNum.Value.Descendants<Level>()
if levels <> null then
let numFormatList = List< string >()
for level in levels do
let numberFormat = level.NumberingFormat.Val.Value.ToString()
numFormatList.Add(numberFormat)
styles.[numId] <- numFormatList
styles
let private extractText (body: Body, numberingStyles: Dictionary<_, _>) =
let text = ref ""
let currentNumbers = Dictionary<_, _>() // Level, CurrentNumber
for para in body.Elements<Paragraph>() do
let numIdElement =
if para.ParagraphProperties <> null && para.ParagraphProperties.NumberingProperties <> null then para.ParagraphProperties.NumberingProperties.NumberingId else null
let levelIdElement =
if para.ParagraphProperties <> null && para.ParagraphProperties.NumberingProperties <> null then para.ParagraphProperties.NumberingProperties.NumberingLevelReference else null
if numIdElement <> null && levelIdElement <> null then
let numId = numIdElement.Val.Value
let levelId = levelIdElement.Val.Value
if not (currentNumbers.ContainsKey(levelId)) then
currentNumbers.[levelId] <- 1
else
currentNumbers.[levelId] <- currentNumbers.[levelId] + 1
for key in currentNumbers.Keys |> Seq.where (fun k -> k > levelId) |> Seq.toList do
currentNumbers.[key] <- 1
if numberingStyles.TryGetValue(numId, &levels) && levelId < levels.Count then
let numberFormat = levels.[levelId]
text := !text + sprintf "%s%d. " numberFormat currentNumbers.[levelId]
text := !text + para.InnerText + Environment.NewLine
!text
let extractTextWithNumbering (filePath: string) =
use wordDoc = WordprocessingDocument.Open(filePath, false)
let mainPart = wordDoc.MainDocumentPart
if mainPart = null then
""
else
let numberingStyles = buildNumberingStyles (mainPart.NumberingDefinitionsPart)
extractText (mainPart.Document.Body, numberingStyles)