-
Notifications
You must be signed in to change notification settings - Fork 0
/
etherscan_scrape.py
46 lines (40 loc) · 1.67 KB
/
etherscan_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""Etherscan.io webscrape of block data
by Caleb Shack
"""
# imports
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time
import requests
def runScrape():
df_list = []
# change index for more or less blocks
for i in range(1, 19600):
url = f'https://etherscan.io/blocks?ps=100&p={i}'
block = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'})
code = block.status_code
if code == 200:
soup = BeautifulSoup(block.content, 'html.parser')
tbl = soup.find("table")
df = pd.read_html(str(tbl))[0]
df = df.rename(columns={'Block': 'block', 'Unnamed: 1': 'timestamp',
'Txn': 'txn', 'Uncles': 'uncles', 'Miner': 'miner',
'Gas Used': 'gas_used', 'Gas Limit': 'gas_limit',
'Base Fee': 'base_fee', 'Reward': 'reward',
'Burnt Fees (ETH)': 'burnt_fees'})
df['extract_time'] = datetime.now()
df['txn_link'] = df['block'].apply(lambda x: f'https://etherscan.io/txs?block={x}')
df_list.append(df)
print(f'page {i} successfully retrieved')
else:
print(f'!!!!!!!!!!!!page {i} failed with status code {code}!!!!!!!!!!!!')
time.sleep(1)
block_df = pd.concat(df_list).reset_index(drop=True)
# save as csv
path = 'C:/...' # input file path here
df.to_csv(path, index = False)
if __name__ == '__main__':
runScrape()
print('scrape complete')