-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheslao_nsf.py
90 lines (45 loc) · 899 Bytes
/
eslao_nsf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <markdowncell>
# This was my start at an attempt to scrape data from every individual nsf grant page.
# <codecell>
from urllib2 import *
# <codecell>
award=1538
# <codecell>
type(award)
# <codecell>
award_str = str(award).zfill(7)
# <codecell>
award_str
# <codecell>
url = 'http://www.nsf.gov/awardsearch/showAward?AWD_ID=' + award_key
# <codecell>
award_str
# <codecell>
url
# <codecell>
type(awards)
# <codecell>
range(10)
# <codecell>
awards = range(10)
# <codecell>
awards
# <codecell>
awards = [str(a).zfill(7) for a in range(10)]
# <codecell>
awards
# <codecell>
urls = [('http://www.nsf.gov/awardsearch/showAward?AWD_ID=' + a) for a in awards]
# <codecell>
urls
# <codecell>
urls[1]
# <codecell>
record = urlopen(urls[5]).read()
# <codecell>
record[:5]
# <codecell>
record[-9999:]
# <codecell>