Skip to content

Commit

Permalink
📊 democracy: lexical index (#2634)
Browse files Browse the repository at this point in the history
* snapshot

* wip

* avoid notebook creation

* clean step + metadata

* typo

* wip

* change version to use shared tools

* refine implemented

* cache only 1 entry

* add ttl

* tweaks

* wip

* working grapher import

* minor fixes

* pre 1800 for numbers

* minor fixes

* extend czechoslovakia, fix indicator title

* change starting year for serbia, bug in years for czechia/slovakia

* distribute ussr population in Asia/Europe

* impute some values

* tweaks

* add note

* add data former members of czechoslovakia and yugoslavia
  • Loading branch information
lucasrodes authored May 13, 2024
1 parent 7054e8b commit 4784218
Show file tree
Hide file tree
Showing 11 changed files with 1,391 additions and 4 deletions.
2 changes: 1 addition & 1 deletion apps/wizard/pages/indicator_upgrade/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def get_schema() -> Dict[str, Any]:
return schema


@st.cache_data
@st.cache_data(max_entries=1, ttl=60 * 10)
def get_indicators_from_datasets(
dataset_id_1: int, dataset_id_2: int, show_new_not_in_old: int = False
) -> Tuple[pd.DataFrame, pd.DataFrame]:
Expand Down
10 changes: 10 additions & 0 deletions dag/democracy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,13 @@ steps:
- data://garden/demography/2023-03-31/population
data://grapher/democracy/2024-05-01/ert:
- data://garden/democracy/2024-03-07/ert

# Lexcial Index (2023)
data://meadow/democracy/2024-05-09/lexical_index:
- snapshot://democracy/2024-05-09/lexical_index.xlsx
data://garden/democracy/2024-03-07/lexical_index:
- data://meadow/democracy/2024-05-09/lexical_index
- data://garden/regions/2023-01-01/regions
- data://garden/demography/2023-03-31/population
data://grapher/democracy/2024-05-09/lexical_index:
- data://garden/democracy/2024-03-07/lexical_index
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"Andorra": "Andorra",
"Angola": "Angola",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Australia": "Australia",
"Austria": "Austria",
"Austria-Hungary": "Austria-Hungary",
"Azerbaijan": "Azerbaijan",
"Baden": "Grand Duchy of Baden",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Bavaria": "Kingdom of Bavaria",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bhutan": "Bhutan",
"Bolivia": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"Brunei": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Cape Verde": "Cape Verde",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Costa Rica": "Costa Rica",
"Cote d'Ivoire": "Cote d'Ivoire",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cyprus": "Cyprus",
"Czech Republic": "Czechia",
"Czechoslovakia": "Czechoslovakia",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"East Timor": "East Timor",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Ethiopia": "Ethiopia",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"Gabon": "Gabon",
"Gambia": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Germany, East": "East Germany",
"Germany, West": "West Germany",
"Ghana": "Ghana",
"Greece": "Greece",
"Grenada": "Grenada",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iran": "Iran",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kosovo": "Kosovo",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Laos": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Liechtenstein": "Liechtenstein",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Macedonia": "North Macedonia",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Micronesia": "Micronesia (country)",
"Modena": "Duchy of Modena and Reggio",
"Moldova": "Moldova",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"Netherlands": "Netherlands",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"Norway": "Norway",
"Oman": "Oman",
"Orange Free State": "Orange Free State",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Parma": "Duchy of Parma and Piacenza",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Qatar": "Qatar",
"Romania": "Romania",
"Russia": "Russia",
"Rwanda": "Rwanda",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Sardinia": "Kingdom of Sardinia",
"Saudi Arabia": "Saudi Arabia",
"Saxony": "Kingdom of Saxony",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Seychelles": "Seychelles",
"Sicily": "Kingdom of the Two Sicilies",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"Somaliland": "Somaliland",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"St. Kitts and Nevis": "Saint Kitts and Nevis",
"St. Lucia": "Saint Lucia",
"St. Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Swaziland": "Eswatini",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syria": "Syria",
"Taiwan": "Taiwan",
"Tajikistan": "Tajikistan",
"Tanzania": "Tanzania",
"Thailand": "Thailand",
"Togo": "Togo",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkey": "Turkey",
"Turkmenistan": "Turkmenistan",
"Tuscany": "Grand Duchy of Tuscany",
"Tuvalu": "Tuvalu",
"USSR": "USSR",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Kingdom": "United Kingdom",
"United States": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela": "Venezuela",
"Vietnam": "Vietnam",
"Wuerttemberg": "Kingdom of Wurttemberg",
"Yemen": "Yemen",
"Yugoslavia": "Yugoslavia",
"Zambia": "Zambia",
"Zanzibar": "Zanzibar",
"Zimbabwe": "Zimbabwe",
"Congo Brazzaville": "Congo",
"Congo, Democratic Republic": "Democratic Republic of Congo",
"Gran Colombia": "Great Colombia",
"Korea": "Korea (former)",
"Korea, North": "North Korea",
"Korea, South": "South Korea",
"Mecklenburg-Schwerin": "Mecklenburg Schwerin",
"Palestine/British Mandate": "Palestine",
"Papal states, the": "Vatican",
"Sahrawi": "Western Sahara",
"Serbia-Montenegro": "Serbia and Montenegro",
"Vietnam, North": "Democratic Republic of Vietnam",
"Vietnam, South": "Republic of Vietnam",
"Yemen, North": "Yemen Arab Republic",
"Yemen, South": "Yemen People's Republic"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# List specifying how to impute specific country.
#
# As an example:
#
# - country: Panama
# country_impute: Colombia
# year_min: 1832
# year_max: 1902
#
# This means that we want to inherit the classifications for Panama from Colombia between 1832 and 1902.
#
# We note that `country` can also be a list of countries.
# USSR
- country:
- Russia
- Ukraine
- Belarus
- Azerbaijan
- Armenia
- Georgia
country_impute: USSR
year_min: 1922
year_max: 1990
- country:
- Turkmenistan
- Uzbekistan
- Kazakhstan
- Tajikistan
- Kyrgyzstan
country_impute: USSR
year_min: 1922
year_max: 1989

- country:
- Lithuania
- Latvia
- Estonia
- Moldova
country_impute: USSR
year_min: 1940
year_max: 1990

# Russian Empire
- country:
- Belarus
- Georgia
- Turkmenistan
- Kazakhstan
country_impute: Russia
year_min: 1800
year_max: 1921
- country:
- Tajikistan
- Kyrgyzstan
country_impute: Russia
year_min: 1868
year_max: 1921
- country: Uzbekistan
country_impute: Russia
year_min: 1865
year_max: 1911
- country: Moldova
country_impute: Russia
year_min: 1800
year_max: 1919
- country: Azerbaijan
country_impute: Russia
year_min: 1813
year_max: 1921

# Czechoslovakia
- country:
- Czechia
- Slovakia
country_impute: Czechoslovakia
year_min: 1918
year_max: 1992

# Yugoslavia
- country:
- Croatia
- Bosnia and Herzegovina
- Slovenia
- North Macedonia
country_impute: Yugoslavia
year_min: 1918
year_max: 1989
- country:
- Serbia
- Montenegro
- Kosovo
country_impute: Yugoslavia
year_min: 1918
year_max: 1991

# Serbia and Montenegro
- country:
- Serbia
- Montenegro
- Kosovo
country_impute: Serbia and Montenegro
year_min: 1992
year_max: 2005

# Kosovo
- country: Kosovo
country_impute: Serbia
year_min: 2006
year_max: 2007
Loading

0 comments on commit 4784218

Please sign in to comment.