forked from vladwulf/Yahoo-ticker-symbol-downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
YahooTickerDownloader.py
executable file
·179 lines (147 loc) · 6.44 KB
/
YahooTickerDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python
import pickle
from time import sleep
import argparse
import io
from ytd import SimpleSymbolDownloader
from ytd.downloader.GenericDownloader import GenericDownloader
from ytd.compat import text
from ytd.compat import csv
from ytd.compat import robotparser
import tablib
import sys
user_agent = SimpleSymbolDownloader.user_agent
options = {
"generic": GenericDownloader()
}
def loadDownloader(tickerType):
downloader = options[tickerType]
with open(tickerType + ".pickle", "rb") as f:
downloader_data = pickle.load(f)
downloader.restore_state(downloader_data)
return downloader
def saveDownloader(downloader, tickerType):
downloader_data = downloader.save_state()
with open(tickerType + ".pickle", "wb") as f:
pickle.dump(downloader_data, file=f, protocol=pickle.HIGHEST_PROTOCOL)
def print_symbol(symbol):
try:
print(" " + text(symbol))
except:
print (" Could not display some ticker symbols due to char encoding")
def downloadEverything(downloader, tickerType, insecure, sleeptime, pandantic):
def status_print(symbols):
print("Got " + str(len(symbols)) + " downloaded " + downloader.type + " symbols:")
if len(symbols) == 0:
pass
elif len(symbols) <= 4:
for s in symbols:
print_symbol(s)
else:
print_symbol(symbols[0])
print_symbol(symbols[1])
print (" etc ...")
print_symbol(symbols[-1])
downloader.printProgress()
loop = 0
while not downloader.isDone():
downloader.nextRequest(status_print, insecure, pandantic)
# Save download state occasionally.
# We do this in case this long running is suddenly interrupted.
print ("Saving downloader to disk...")
saveDownloader(downloader, tickerType)
print ("Downloader successfully saved.")
print ("")
if not downloader.isDone():
sleep(sleeptime) # So we don't overload the server.
def main():
downloader = None
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--insecure", help="use HTTP instead of HTTPS", action="store_true")
parser.add_argument("-e", "--export", help="export immediately without downloading (Only useful if you already downloaded something to the .pickle file)", action="store_true")
parser.add_argument('-E', '--Exchange', help='Only export ticker symbols from this exchange (the filtering is done during the export phase)')
parser.add_argument('type', nargs='?', default='generic', help='The type to download, this can be: '+" ".join(list(options.keys())))
parser.add_argument("-s", "--sleep", help="The time to sleep in seconds between requests", type=float, default=0)
parser.add_argument("-p", "--pandantic", help="Stop and warn the user if some rare assertion fails", action="store_true")
args = parser.parse_args()
protocol = 'http' if args.insecure else 'https'
if args.insecure:
print("Using insecure connection")
if args.export:
print("Exporting pickle file")
tickerType = args.type = args.type.lower()
print("Checking if we can resume a old download session")
try:
downloader = loadDownloader(tickerType)
print("Downloader found on disk, resuming")
except:
print("No old downloader found on disk")
print("Starting a new session")
if tickerType not in options:
print("Error: " + tickerType + " is not a valid type option. See --help")
exit(1)
else:
downloader = options[tickerType]
rp = robotparser.RobotFileParser()
rp.set_url(protocol + '://finance.yahoo.com/robots.txt')
rp.read()
try:
if not args.export:
if(not rp.can_fetch(user_agent, protocol + '://finance.yahoo.com/_finance_doubledown/api/resource/searchassist')):
print('Execution of script halted due to robots.txt')
return 1
if not downloader.isDone():
print("Downloading " + downloader.type)
print("")
downloadEverything(downloader, tickerType, args.insecure, args.sleep, args.pandantic)
print ("Saving downloader to disk...")
saveDownloader(downloader, tickerType)
print ("Downloader successfully saved.")
print ("")
else:
print("The downloader has already finished downloading everything")
print("")
except Exception as ex:
print("A exception occurred while downloading. Suspending downloader to disk")
saveDownloader(downloader, tickerType)
print("Successfully saved download state")
print("Try removing {type}.pickle file if this error persists")
print("Issues can be reported on https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues")
print("")
raise
except KeyboardInterrupt as ex:
print("\nSuspending downloader to disk as .pickle file")
saveDownloader(downloader, tickerType)
raise
if downloader.isDone() or args.export:
print("Exporting "+downloader.type+" symbols")
data = tablib.Dataset()
data.headers = downloader.getRowHeader()
for symbol in downloader.getCollectedSymbols():
if(args.Exchange == None):
data.append(symbol.getRow())
elif (symbol.exchange == args.Exchange):
data.append(symbol.getRow())
with io.open(downloader.type + '.csv', 'w', encoding='utf-8') as f:
f.write(text.join(u',', data.headers) + '\n')
writer = csv.writer(f)
for i in range(0, len(data)):
row = [text(y) if not y is None else u"" for y in data[i]]
writer.writerow(row)
try:
with open(downloader.type + '.xlsx', 'wb') as f:
f.write(data.xlsx)
except:
print("Could not export .xlsx due to a internal error")
try:
with open(downloader.type + '.json', 'wb') as f:
f.write(data.json.encode('UTF-8'))
except:
print("Could not export .json due to a internal error")
try:
with open(downloader.type + '.yaml', 'wb') as f:
f.write(data.yaml.encode('UTF-8'))
except:
print("Could not export .yaml due to a internal error")
if __name__ == "__main__":
main()