-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodule.rs
154 lines (133 loc) · 4.31 KB
/
module.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//
use std::{
fmt::{self, Display},
path::Path,
};
use anyhow::{Context, Result};
use scraper::{ElementRef, Html, Selector};
use serde::Serialize;
use url::{self, Url};
#[derive(Debug)]
pub struct ModuleParser {
items: Selector,
title: Selector,
base_url: Url,
}
impl ModuleParser {
pub fn new() -> ModuleParser {
const ITEM_SELECTOR: &str = r"
.Agda .Function ,
.Agda .Datatype ,
.Agda .InductiveConstructor ,
.Agda .CoinductiveConstructor ,
.Agda .Record ,
.Agda .Field
";
ModuleParser {
items: Selector::parse(ITEM_SELECTOR).expect("item selector"),
title: Selector::parse("html title").expect("title selector"),
base_url: Url::parse("http://invalid./").expect("arbitrary base URL"),
}
}
// pub fn from_path(path: &Path) -> Option<ModuleParser> {
// let base_path = path.parent()?;
// let base_url = Url::parse(&format!("file://{}/", base_path.display())).ok()?;
// Some(ModuleParser::new(base_url))
// }
fn parse_target_item(&self, target: &Url) -> Result<(String, String)> {
let id = target.fragment().context("No target ID")?;
let path = self
.base_url
.make_relative(target)
.context("Not a valid URL relative to base URL")?;
let module = Path::new(&path)
.file_stem()
.context("No file stem found")?
.to_str()
.context("File name is not a valid module name")?;
Ok((id.into(), module.into()))
}
fn parse_item(
&self,
item: ElementRef,
module_name: &str,
url_parser: url::ParseOptions,
) -> Result<Option<Item>> {
let identifier = item.text().next().context("Missing text")?;
let element = item.value();
let id = element.id().context("Missing ID")?;
let target_url = match element.attr("href") {
Some(href) => url_parser.parse(href).context("Invalid link target")?,
None => {
// Some items are anchors but do not point anywhere.
// Assume that these are definitions like `Y` in
//
// import Foo.Bar renaming (X to Y)
//
// and return them anyways:
return Ok(Some(Item {
id: id.into(),
identifier: identifier.into(),
}));
}
};
let (target_id, target_module) = self.parse_target_item(&target_url)?;
let item = if id == target_id && module_name == target_module {
Some(Item {
id: id.into(),
identifier: identifier.into(),
})
} else {
None
};
Ok(item)
}
pub fn parse_module(&self, content: &str) -> Result<Module> {
let url_parser = Url::options().base_url(Some(&self.base_url));
let document = Html::parse_document(content);
let name = document
.select(&self.title)
.next()
.map(|el| el.inner_html())
.context("No module name")?;
let items = document
.select(&self.items)
.filter_map(|item| match self.parse_item(item, &name, url_parser) {
Ok(item) => item,
Err(err) => {
eprintln!("Warning: Skipping item ({})", err);
None
}
})
.collect();
Ok(Module { name, items })
}
}
#[derive(Debug, Serialize)]
pub struct Item {
pub id: String,
pub identifier: String,
}
impl Display for Item {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{identifier}#{id}",
identifier = self.identifier,
id = self.id
)
}
}
#[derive(Debug, Serialize)]
pub struct Module {
pub name: String,
pub items: Vec<Item>,
}
impl Display for Module {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(&self.name)
}
}