Skip to content

Commit

Permalink
added dev uncomtrade api #29
Browse files Browse the repository at this point in the history
  • Loading branch information
ouslan committed Oct 9, 2024
1 parent b3f3d23 commit e8a1804
Showing 1 changed file with 116 additions and 0 deletions.
116 changes: 116 additions & 0 deletions notebooks/comtrade.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
---
title: "Agriculture processing"
format:
html:
code-fold: true
jupyter: python3
---

```{python}
import os
os.chdir("..")
```

```{python}
import polars as pl
import pandas as pd
import time
import requests
import comtradeapicall
from src.jp_imports.data_process import DataProcess
d = DataProcess("data/")
```

```{python}
mydf = comtradeapicall.previewFinalData(typeCode='C', freqCode='M', clCode='HS', period='202403',
reporterCode='', cmdCode=','.join(chunks[0]), flowCode='Xx', partnerCode='584',
partner2Code=None,
customsCode=None, motCode=None, maxRecords=500, format_output='JSON',
aggregateBy=None, breakdownMode=None, countOnly=None, includeDesc=True)
if mydf == None:
print("hit rate limit")
```

```{python}
time = "yearly"
types = "hs"
df = d.process_int_org(time, types, False).collect()
codes = df.select(pl.col("hs").str.slice(0, 4)).unique().sort(by="hs").to_series().to_list()
len(codes)
# devide it in chunks of 20
chunks = [codes[i:i+50] for i in range(0, len(codes), 20)]
len(chunks)
time.sleep(6)
pritn(f"Total chunks: {len(chunks)}")
```

```{python}
empty_df = [
pl.Series("refYear", [], dtype=pl.String),
pl.Series("refMonth", [], dtype=pl.String),
pl.Series("reporterCode", [], dtype=pl.String),
pl.Series("reporterDesc", [], dtype=pl.String),
pl.Series("flowCode", [], dtype=pl.String),
pl.Series("flowDesc", [], dtype=pl.String),
pl.Series("partnerDesc", [], dtype=pl.String),
pl.Series("classificationCode", [], dtype=pl.String),
pl.Series("cmdCode", [], dtype=pl.String),
pl.Series("cmdDesc", [], dtype=pl.String),
pl.Series("cifvalue", [], dtype=pl.String),
pl.Series("fobvalue", [], dtype=pl.String),
pl.Series("primaryValue", [], dtype=pl.String),
pl.Series("netWgt", [], dtype=pl.String),
]
master_df = pl.DataFrame(empty_df)
for year in range(2010, 2025):
for month in range(1, 13):
if year == 2024 and month >= 10:
continue
for chunk in chunks:
while True:
mydf = comtradeapicall.previewFinalData(
typeCode='C',
freqCode='M',
clCode='HS',
period=f'{year}{str(month).zfill(2)}',
reporterCode='',
cmdCode=','.join(chunk),
flowCode='X',
partnerCode='584',
partner2Code=None,
customsCode=None,
motCode=None,
maxRecords=500,
format_output='JSON',
aggregateBy=None,
breakdownMode=None,
countOnly=None,
includeDesc=True
)
# Check if mydf is None
if mydf is None:
print("Rate limit reached. Waiting to try again...")
time.sleep(60)
continue
# Check if mydf is an empty DataFrame
elif mydf.empty:
print(f"No data returned for {year}-{month}, chunk: {chunk}")
break
# Check if mydf has 500 rows
elif len(mydf) == 500:
print(f"Error: {year}-{month} {chunk}, {len(mydf)} rows")
break
else:
break
# Process mydf if it has rows
if len(mydf) > 0:
mydf = mydf[["refYear", "refMonth", "reporterCode", "reporterDesc", "flowCode", "flowDesc", "partnerDesc", "classificationCode", "cmdCode", "cmdDesc", "cifvalue", "fobvalue", 'primaryValue', 'netWgt']]
tmp = pl.from_pandas(mydf).cast(pl.String)
master_df = pl.concat([master_df, tmp], how="vertical")
print(f"Processed {year}-{str(month).zfill(2)}, {len(tmp)} rows")
master_df.to_csv("data/master_df.csv")
```

0 comments on commit e8a1804

Please sign in to comment.