forked from edsu/microdata
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
129 lines (95 loc) · 4.95 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
try:
import json
except ImportError:
import simplejson as json
import unittest
from microdata import get_items, Item, URI
class MicrodataParserTest(unittest.TestCase):
def test_parse(self):
# parse the html for microdata
with open('test-data/example.html') as f:
items = get_items(f)
# this html should have just one main item
self.assertTrue(len(items), 1)
item = items[0]
# item's type should be set
self.assertEqual(item.itemtype, [URI("http://schema.org/Person")])
# test simple case of a single valued property
self.assertEqual(item.name, "Jane Doe")
# but object properties can have multiple values ...
# basic accessor returns the first value
self.assertEqual(item.colleagues,
URI("http://www.xyz.edu/students/alicejones.html"))
# and get_all, well, gets them all of course :)
self.assertEqual(item.get_all("colleagues"),
[URI("http://www.xyz.edu/students/alicejones.html"),
URI("http://www.xyz.edu/students/bobsmith.html")])
# address should be another item
self.assertTrue(isinstance(item.address, Item))
self.assertEqual(item.address.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue(item.address.addressLocality, "Seattle")
# <script> tag should be ignored in the content text
self.assertFalse("Unrelated text" in item.address.streetAddress)
# json
i = json.loads(item.json())
self.assertEqual(i["properties"]["name"][0], "Jane Doe")
self.assertEqual(i["type"], ["http://schema.org/Person"])
self.assertEqual(i["id"], "http://www.xyz.edu/~jane")
self.assertTrue(isinstance(i["properties"]["address"][0], dict))
self.assertEqual(i["properties"]["address"][0]["properties"]["addressLocality"][0], "Seattle")
def test_parse_nested(self):
# parse the html for microdata
with open("test-data/example-nested.html") as f:
items = get_items(f)
# this html should have just one main item
self.assertTrue(len(items), 1)
item = items[0]
# item's type should be set
self.assertEqual(item.itemtype, [URI("http://schema.org/Event")])
# test case of a nested itemprop
self.assertEqual(item.name.strip(), "Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)")
# test case of a nested itemscope
self.assertTrue(isinstance(item.location, Item))
self.assertEqual(item.location.itemtype, [URI("http://schema.org/Place")])
self.assertEqual(item.location.url, URI("wells-fargo-center.html"))
# address should be a nested item
self.assertTrue(isinstance(item.location.address, Item))
self.assertEqual(item.location.address.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue(item.location.address.addressLocality, "Philadelphia")
# json
i = json.loads(item.json())
self.assertEqual(i["properties"]["name"][0].strip(), "Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)")
self.assertEqual(i["type"], ["http://schema.org/Event"])
self.assertEqual(i["properties"]["url"], ["nba-miami-philidelphia-game3.html"])
self.assertTrue(isinstance(i["properties"]["location"][0], dict))
self.assertEqual(i["properties"]["location"][0]["properties"]["url"][0], "wells-fargo-center.html")
self.assertTrue(isinstance(i["properties"]["location"][0]["properties"]["address"][0], dict))
self.assertEqual(i["properties"]["location"][0]["properties"]["address"][0]["properties"]["addressLocality"][0], "Philadelphia")
def test_parse_unlinked(self):
with open("test-data/unlinked.html") as f:
items = get_items(f)
self.assertEqual(len(items), 2)
i = items[0]
self.assertEqual(i.itemtype, [URI("http://schema.org/Person")])
self.assertEqual(i.name, "Jane Doe")
self.assertEqual(i.streetAddress, None)
# this PostalAddress is enclosed within the Person but it is
# not linked via the streetAddress itemprop. This particular example
# would represent a bug in the markup, but technically items can appear
# within other items without them being related together with an
# itemprop.
i = items[1]
self.assertEqual(i.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue('Whitworth' in i.streetAddress)
def test_skip_level(self):
with open("test-data/skip-level.html") as f:
items = get_items(f)
self.assertEqual(len(items), 1)
self.assertEqual(items[0].name, "Jane Doe")
def test_parse_with_encoding_declared(self):
with open('test-data/encoding-declared.html') as f:
# Should not raise.
items = get_items(f)
self.assertEqual(len(items), 0)
if __name__ == "__main__":
unittest.main()