-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path20190627_StockPrice.py
66 lines (58 loc) · 1.7 KB
/
20190627_StockPrice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests
import re
import bs4
from bs4 import BeautifulSoup
import pandas as pd
def getHTML(url):
try:
r=requests.get(url)
#print(r.status_code)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
print("False HTML!")
return ""
def getList(bbb,ulist):
for i in range(4836):
t=bbb[i].string
lef=t.find('(')
rig=t.find(')')
if (t[lef+1]=='3') or (t[lef+1]=='6'):
ulist.append([t[:lef], t[lef+1:rig]])
#print(ulist[i])
pass
def getPrice(ulist):
for i in range(len(ulist)):
origin='https://gupiao.baidu.com/stock/'
if ulist[i][1][0]=='6':
origin=origin+'sh'
if ulist[i][1][0]=='3':
origin=origin+'sz'
if (ulist[i][1][0]!='3') and (ulist[i][1][0]!='6'):
continue
url=origin+ulist[i][1]+'.html'
html=getHTML(url)
try:
soup=BeautifulSoup(html, 'html.parser')
ccc=soup.find_all('strong')
ulist[i].append(ccc[0].string)
for sp in soup.strong.find_next_siblings():
ulist[i].append(sp.string)
#print(ulist[i])
except:
print('False: ', i)
continue
def main():
url = 'http://quote.eastmoney.com/stock_list.html'
uhtml = getHTML(url)
soup=BeautifulSoup(uhtml, 'html.parser')
aaa=soup.find_all('div', {'class':'quotebody'})
bbb=aaa[0].find_all('li')
ulist=[]
uinfo=[]
getList(bbb,ulist)
getPrice(ulist)
ans=pd.DataFrame(ulist)
ans.to_excel('StockPrice.xlsx')
main()