-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrawler.py
157 lines (139 loc) · 4.67 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import json
import requests
import csv
import shutil
def flatten_item(item):
flat_item = {}
for key, value in item.items():
if isinstance(value, dict):
for subkey, subvalue in value.items():
flat_item[f"{key}.{subkey}"] = subvalue
elif isinstance(value, list):
flat_item[key] = json.dumps(value)
else:
flat_item[key] = value
return flat_item
def save_to_csv(items, filename):
fieldnames = [
'timestamp',
'fee.type',
'fee.value',
'gas_limit',
'block',
'status',
'method',
'confirmations',
'type',
'exchange_rate',
'to.ens_domain_name',
'to.hash',
'to.implementation_name',
'to.is_contract',
'to.is_verified',
'to.metadata',
'to.name',
'to.private_tags',
'to.public_tags',
'to.watchlist_names',
'tx_burnt_fee',
'max_fee_per_gas',
'result',
'hash',
'gas_price',
'priority_fee',
'base_fee_per_gas',
'from.ens_domain_name',
'from.hash',
'from.implementation_name',
'from.is_contract',
'from.is_verified',
'from.metadata',
'from.name',
'from.private_tags',
'from.public_tags',
'from.watchlist_names',
'token_transfers',
'tx_types',
'gas_used',
'created_contract',
'position',
'nonce',
'has_error_in_internal_txs',
'actions',
'decoded_input',
'decoded_input.method_id',
'decoded_input.method_call',
'decoded_input.parameters',
'decoded_input.raw',
'token_transfers_overflow',
'raw_input',
'value',
'max_priority_fee_per_gas',
'revert_reason',
'revert_reason.method_id',
'revert_reason.method_call',
'revert_reason.parameters',
'revert_reason.raw',
'confirmation_duration',
'tx_tag'
]
with open(filename, 'a', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
for item in items:
flat_item = flatten_item(item)
writer.writerow(flat_item)
def get_data(api_url):
response = requests.get(api_url)
data = response.json()
next_page_params = data.get('next_page_params', {})
items = data.get('items', [])
return next_page_params, items
def write_params_to_file(params, filename):
with open(filename, 'w') as file:
file.write(json.dumps(params))
def append_params_to_file(params, filename):
with open(filename, 'a') as file:
file.write(json.dumps(params) + '\n')
def check_file_size(filename):
if os.path.exists(filename):
file_size = os.path.getsize(filename)
# Chuyển đổi kích thước từ byte sang megabyte
file_size_mb = file_size / (1024 * 1024)
return file_size_mb > 50
else:
return False
# path to reach current_params.txt file
current_params_file = "current_params.txt"
# Read info from current_params.txt file
with open(current_params_file, 'r') as file:
next_page_params = json.loads(file.read())
print(next_page_params)
# Xây dựng URL API từ thông tin trong current_params.txt
api_url_base = "https://optimism-sepolia.blockscout.com/api/v2/addresses/0x5c48ab8DFD7abd7D14027FF65f01887F78EfFE0F/transactions"
api_url_params = "&".join([f"{key}={value}" for key, value in next_page_params.items()])
api_url = f"{api_url_base}?{api_url_params}"
called_params_file = "called_params.txt"
template_transaction_file = "transaction.csv"
transaction_file = "./data/transaction.csv"
while api_url:
print("-------- API_URL --------")
print(api_url)
next_page_params, items = get_data(api_url)
print("--------- Next Params")
print(next_page_params)
write_params_to_file(next_page_params, current_params_file)
append_params_to_file(next_page_params, called_params_file)
save_to_csv(items, transaction_file)
# checking the size of transaction.csv file
if check_file_size(transaction_file):
# change transaction file name
archive_transaction_file = f"./data/transaction_{len(os.listdir('./data')) + 1}.csv"
os.rename(transaction_file, archive_transaction_file)
# Copy the template and continue
shutil.copy(template_transaction_file, transaction_file)
# create another url with next parameter
api_url_params = "&".join([f"{key}={value}" for key, value in next_page_params.items()])
api_url = f"{api_url_base}?{api_url_params}"
print("Calling params:", next_page_params)
print("Done.")