-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfpid_map.py
executable file
·61 lines (54 loc) · 7.71 KB
/
fpid_map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
from lxml import etree
import isbn
url_base = 'http://techbus.safaribooksonline.com/xmlapi/?id='
crazy_aliases = ['9780596518561', '9781565924642', '9781606492147', '9780596101435', '9780596002459', '9781565923911', '9780596003784', '9781606491300', '9780596002398', '9780596155285', '9780596000172', '9781606491867', '9780596008543', '9781565926165', '9780596005832', '9780596000189', '9780596001674', '9781565925007', '9780596002176', '9783866453166', '9780596003814', '9780596004606', '9780596006150', '9780596002350', '9780596005221', '9780596001605', '9780596002497', '9780596004583', '9780596004996', '9780596006075', '9781565927193', '9781565924499', '9781565924789', '9781565927469', '9781565929418', '9780596004699', '9780596006471', '9780596002787', '9780133359275', '9780596000301', '9780596001254', '9780596001520', '9780596002602', '9780596003173', '9781565927681', '9780596000783', '9780596008185', '9781565924857', '9780596000417', '9780596000608', '9781565923775', '9781565923478', '9780596008383', '9781449366834', '9780596006853', '9780596007676', '9780596526757', '9780596008987', '9780596003517', '9780596000738', '9780596000356', '9781565923256', '9780596100544', '9780596007751', '9781565925298', '9780596003999', '9780596009373', '9780596000127', '9780596005382', '9780596006587', '9780596005573', '9781565926653', '9780596002008', '9780596002138', '9780596007522', '9781565927094', '9780596009533', '9780596006266', '9781606492321', '9781565927056', '9780596004385', '9780596005252', '9781606492246', '9781904811404', '9781565925779', '9780596006242', '9781565926479', '9780596006945', '9781606493311', '9781565923850', '9780596003708', '9780596001186', '9780596001841', '9780735622579', '9781606492420', '9780596003098', '9780596005641', '9781565924697', '9781565924550', '9780596001162', '9781565926271', '9780596003388', '9780596527266', '9780596004729', '9781565925250', '9780596000080', '9780596002268', '9780596000165', '1572316217', '9780596009137', '9780596008574', '9780596007140', '9781565926615', '9781565921047', '9780596002169', '9781565928381', '9780596006006', '9780596007065', '9781606493076', '9781904811855', '9780596006662', '9780596007249', '9781565926103', '9781606491805', '9781457165955', '9780596006365', '9780596002435', '9780596002763', '9780596001797', '9780596003081', '9781606492468', '9780596004507', '9780596001889', '9781565924031', '9780596005368', '9780596002046', '9780596004668', '9781565924345', '9780596006211', '9780596002909', '9780596002619', '9780596008628', '9780596526726', '9781565922068', '9781565927131', '9780596001704', '9780596004002', '9780596005306', '9783868999112', '9780735639850', '9780596004590', '9781565922204', '9781609604875', '9780596000202', '9781565924505', '9780596001278', '9781565924666', '9780596001230', '9780596009007', '9780596007102', '9780596518387', '9781565923249', '9781565927148', '9780596003449', '9781565925366', '9780596002534', '9781565926813', '9781565925984', '9780596006631', '9780596000059', '9780596004705', '9781606491966', '9780596001292', '9780596003623', '9780596003838', '9780596008222', '9781565924895', '9780596005894', '9781565925786', '9780596001575', '9780596001971', '9780596003494', '9780596002657', '9780596003692', '9781565929470', '9781617290091', '9780596001193', '9781565924000', '9781565924949', '9780596004828', '9780596000714', '9780596005023', '9780596001179', '9781606492123', '9780596002725', '9780596000158', '9781565922693', '9780596000936', '9780596001285', '9780596003395', '9780596002374', '9781565926912', '9781565922921', '9780596000257', '9780596001308', '9780735623934', '9780596005115', '9780596004873', '9780596101190', '9780596008567', '9780596009625', '9780596007157', '9780596002305', '9780596007096', '9780596003579', '9780596001650', '9781606491560', '9781565926820', '9781565927209', '9780596007928', '9780596154868', '9781565926981', '9780596519230', '9780596006174', '9780596002572', '9780596007379', '9780124159532', '9780596003883', '9780596002695', '9780596001001', '9780596002756', '9781565924192', '9780596001896', '9780596101053', '9781565929432', '9781565927445', '9780596004477', '9780735625488', '9780596004903', '9780596006518', '9780596001711', '9781606492673', '9780596000998', '9781565928695', '9780596000448', '9780596102401', '9780596008635', '9781565926004', '9780596003722', '9780735670921', '9780596100667', '9781565927537', '9780596000844', '9780596001339', '9780596001438', '9780596000530', '9780596000929', '9781565924574', '9780596101428', '9780596000882', '9780596007287', '9780596003531', '9781565926707', '9781565923904', '9780596001773', '9781565926097', '9780596009427', '9780470384206', '9780596003289', '9780596000486', '9781565928527', '9780132550581', '9780596005559', '9780596008451', '9780596807641', '9781565921528', '9781565924154', '9781449394837', '9780596005269', '9781565926875', '9781565922822', '9781606493113', '9780596527464', '9780596001964', '9780596001612', '9781565925373', '9780596006068', '9780596003876', '9780596008772', '9781466619548', '9780596003487', '9780596009601', '9781565927186', '9781565922433', '9780596000967', '9781565928404', '9781565925151', '9781904811688', '9780596005429', '9780596008482', '9781565928565', '9780596002961', '9781565926424', '9781565928701', '9780596000790', '9781565927698', '9780596004040', '9780596003364', '9781565923539', '9780596001810', '9780596002367', '9780596154639', '9781565925793', '9780596008413', '9781565923799', '9781565926226', '9780596007164', '9780596003463', '9780132498272', '9780735651685', '9781565922860', '9780735619579', '9781565924321', '9781565928411', '9780596100346', '9780596005764', '9781457102738', '9781606491942', '9780596102098', '9781565927049', '9780596002565', '9780596005597', '9781565922891', '9780596001032', '9781565925168', '9781565924185', '9780596003609', '9780596003760', '9781565929425', '9780596007942', '9780596007584', '9780596004392', '9780596004644', '9780596001728', '9780596000981', '9780596001452', '9780596004897', '9781606492208', '9780596101787', '9781565927018', '9780596000851', '9780596006433']
mapped = 0
total = 0
f = open('/home/mhare/fpid.map', 'w')
for alias in crazy_aliases:
total += 1
if not isbn.valid(alias):
print "invalid isbn " + alias
else:
alias13 = ''
fpid_str13 = ''
if len(alias) == 13 or len(alias) == 10:
alias13 = isbn.convert(alias)
url = url_base + alias13
bvd_goo = etree.parse(url)
fpid = bvd_goo.xpath('/safari/book/@id')
if fpid != []:
fpid_str13 = str(fpid).replace("['","").replace("']","")
fpid_str = ''
url = url_base + alias
bvd_goo = etree.parse(url)
fpid = bvd_goo.xpath('/safari/book/@id')
if fpid != []:
fpid_str = str(fpid).replace("['","").replace("']","")
output = ''
if alias == fpid_str:
print alias + " -> " + fpid_str
else:
if fpid_str and not fpid_str13:
#print "A " + alias + " -> " + fpid_str
output = alias + "," + fpid_str + "\n"
elif fpid_str13 and not fpid_str:
#print "B " + alias + " -> " + alias13 + " -> " + fpid_str13
output = alias + "," + fpid_str13 + "\n"
elif fpid_str and fpid_str13:
if fpid_str == fpid_str13:
#print "C " + alias + " -> " + fpid_str
output = alias + "," + fpid_str + "\n"
else:
print "ERROR: inconsistent ISBN conversion " + alias
else:
print "D " + alias + " -> " + "NONE"
if output:
mapped += 1
f.write(output)
f.close()
notmapped = total - mapped
print total
print mapped
print notmapped