Skip to content

Commit

Permalink
Tryout extract google mobility spatial data (#287)
Browse files Browse the repository at this point in the history
Added extraction of provincial google community mobility data to function `get_google_mobility_data` in `~/data/mobility.py`
  • Loading branch information
twallema authored Oct 11, 2021
1 parent e8d3605 commit 6a62947
Show file tree
Hide file tree
Showing 75 changed files with 36,395 additions and 144 deletions.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

36,335 changes: 36,334 additions & 1 deletion data/raw/sciensano/COVID19BE_VACC.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,7 @@
'refusal' : [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]})

# time-dependent seasonality parameters in seasonality_function
params.update({'season_factor' : 1,
'amplitude' : 0.1,
params.update({'amplitude' : 0.1,
'peak_shift' : 0})


Expand All @@ -310,7 +309,9 @@
'place' : mobility_function,
'N_vacc' : vaccination_function,
'alpha' : VOC_function,
'season_factor' : seasonality_function})
'beta_R' : seasonality_function,
'beta_U': seasonality_function,
'beta_M': seasonality_function})



Expand Down Expand Up @@ -345,7 +346,7 @@
# PSO settings
processes = int(os.getenv('SLURM_CPUS_ON_NODE', mp.cpu_count()))
sys.stdout.flush()
multiplier = 10
multiplier = 2
maxiter = maxiter_PSO
popsize = multiplier*processes

Expand Down Expand Up @@ -428,104 +429,6 @@
print(f"Run time PSO: {day}d{hour}h{minute:02}m{second:02}s")
sys.stdout.flush()


# ------------------
# Setup MCMC sampler
# ------------------

# NOTE: actually, I don't think running the MCMC is necessary for job==R0?

# Define priors
log_prior_fcn = [prior_uniform, prior_uniform, prior_uniform]
log_prior_fcn_args = bounds[1:]
# Perturbate PSO estimate
pars = ['beta_R', 'beta_U', 'beta_M']
pert = [0.02, 0.02, 0.02]
ndim, nwalkers, pos = perturbate_PSO(theta[1:], pert, multiplier=processes, bounds=log_prior_fcn_args, verbose=False)

# Manually overwrite the number of walkers (12 times 14 is too much man!)
nwalkers = processes * 4

# Set up the sampler backend if needed
if backend:
filename = f'{spatial_unit}_backend_{run_date}'
backend = emcee.backends.HDFBackend(results_folder+filename)
backend.reset(nwalkers, ndim)

# Labels for traceplots
labels = ['$\\beta^R$', '$\\beta^U$', '$\\beta^M$']
# Arguments of chosen objective function
objective_fcn = objective_fcns.log_probability
objective_fcn_args = (model, log_prior_fcn, log_prior_fcn_args, data, states, pars)
objective_fcn_kwargs = {'weights':weights, 'draw_fcn':None, 'samples':{}, 'start_date':start_calibration, \
'warmup':warmup, 'dist':'poisson', 'poisson_offset':poisson_offset, 'agg':agg}


print('\n2) Markov-Chain Monte-Carlo sampling')
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n')
print(f'Using {processes} cores for {ndim} parameters, in {nwalkers} chains.\n')
sys.stdout.flush()


# ----------------
# Run MCMC sampler
# ----------------

# Print autocorrelation and traceplot every print_n'th iteration
sampler = run_MCMC(pos, max_n, print_n, labels, objective_fcn, objective_fcn_args, \
objective_fcn_kwargs, backend, spatial_unit, run_date, job, progress=progress, agg=agg)


# ---------------
# Process results
# ---------------

thin = 1
try:
autocorr = sampler.get_autocorr_time()
thin = max(1,int(0.5 * np.min(autocorr)))
print(f'Convergence: the chain is longer than 50 times the intergrated autocorrelation time.\nPreparing to save samples with thinning value {thin}.')
sys.stdout.flush()
except:
print('Warning: The chain is shorter than 50 times the integrated autocorrelation time.\nUse this estimate with caution and run a longer chain! Saving all samples (thinning=1).\n')
sys.stdout.flush()

# Print runtime in hours
final_time = datetime.datetime.now()
runtime = (final_time - intermediate_time)
totalMinute, second = divmod(runtime.seconds, 60)
hour, minute = divmod(totalMinute, 60)
day = runtime.days
if day == 0:
print(f"Run time MCMC: {hour}h{minute:02}m{second:02}s")
else:
print(f"Run time MCMC: {day}d{hour}h{minute:02}m{second:02}s")
sys.stdout.flush()

print('\n3) Sending samples to dictionary')
sys.stdout.flush()

flat_samples = sampler.get_chain(discard=0,thin=thin,flat=True)
samples_dict = {}
for count,name in enumerate(pars):
samples_dict[name] = flat_samples[:,count].tolist()

samples_dict.update({
'warmup' : warmup,
'start_date_R0' : start_calibration,
'end_date_R0' : end_calibration,
'n_chains_R0': int(nwalkers)
})

json_file = f'{samples_path}{str(spatial_unit)}_{run_date}.json'
with open(json_file, 'w') as fp:
json.dump(samples_dict, fp)

print('DONE!')
print(f'SAMPLES DICTIONARY SAVED IN "{json_file}"')
print('-----------------------------------------------------------------------------------------------------------------------------------\n')
sys.stdout.flush()

# Work is done
sys.exit()

Expand All @@ -543,17 +446,17 @@

# Start of calibration
start_calibration = '2020-03-02'
# Last datapoint used to calibrate infectivity, compliance and effectivity
# Last datapoint used to calibrate infectivity, compliance and effectivity
if not args.enddate:
end_calibration = '2021-08-27'
end_calibration = df_sciensano.index.max().strftime("%m-%d-%Y")
else:
end_calibration = str(args.enddate)
# Spatial unit: depesnds on aggregation
spatial_unit = f'{agg}_full-pandemic_{job}_{signature}'

# PSO settings
processes = int(os.getenv('SLURM_CPUS_ON_NODE', mp.cpu_count()))
multiplier = 1 # 10
multiplier = 2 # 10
maxiter = maxiter_PSO
popsize = multiplier*processes

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
68 changes: 49 additions & 19 deletions src/covid19model/data/mobility.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_apple_mobility_data(update=True):

return df_apple

def get_google_mobility_data(update=True, plot=False, filename_plot=None):
def get_google_mobility_data(update=True, provincial=False, plot=False, filename_plot=None):
"""Download Google Community mobility report data
This function downloads, formats and returns the available Belgian Google Community mobility report data.
Expand All @@ -94,6 +94,8 @@ def get_google_mobility_data(update=True, plot=False, filename_plot=None):
update : boolean (default True)
True if you want to update the data,
False if you want to read only previously saved data
provincial : boolean (default False)
True to return the google mobility data per province (10 provinces + Brussels)
plot : boolean (default False)
If True, return a preformatted plot of the data
filename_viz: string
Expand All @@ -103,15 +105,20 @@ def get_google_mobility_data(update=True, plot=False, filename_plot=None):
Returns
-----------
data : pandas.DataFrame
DataFrame with the google mobility data on daily basis. The following columns
are returned:
DataFrame with the google mobility data on daily basis. The following columns are returned:
- retail_recreation : Mobility trends for places such as restaurants, cafés, shopping centres, theme parks, museums, libraries and cinemas.
- grocery : Mobility trends for places such as grocery shops, food warehouses, farmers markets, specialty food shops and pharmacies.
- parks: Mobility trends for places such as local parks, national parks, public beaches, marinas, dog parks, plazas and public gardens.
- transport: Mobility trends for places that are public transport hubs, such as underground, bus and train stations.
- work: Mobility trends for places of work.
- residential: Mobility trends for places of residence.
If provincial data is requested the resulting dataframe has a pandas multiindex containing two levels: 1) date and 2) region.
To access multi-indexed data, use the 'loc' method: df.loc[(index_val_1, index_val_2), column_name].
F.i. df[('2020-03-15', 'Brussels'), :] returns the mobility reductions on March 15th, 2020 in Brussels.
To access the entries corresponding to an entire index level (f.i. all dates), use the slice method.
F.i. df[(slice(None), 'Brussels'),:] returns the mobility reductions in Brussels on all possible dates
Notes
----------
Expand All @@ -137,6 +144,7 @@ def get_google_mobility_data(update=True, plot=False, filename_plot=None):
abs_dir = os.path.dirname(__file__)
dtypes = {'sub_region_1': str, 'sub_region_2': str}

# Update data if necessary
if update:
# download raw data
df = pd.read_csv(url, parse_dates=['date'], dtype=dtypes)
Expand All @@ -152,36 +160,58 @@ def get_google_mobility_data(update=True, plot=False, filename_plot=None):
'../../../data/raw/mobility/google/google_community_mobility_data_BE.csv'),
parse_dates=['date'], dtype=dtypes)

# Extract values
data=df[df['sub_region_1'].isnull().values]

# Assign data to output variables
# Map column names
variable_mapping = {
'retail_and_recreation_percent_change_from_baseline': 'retail_recreation',
'grocery_and_pharmacy_percent_change_from_baseline': 'grocery',
'parks_percent_change_from_baseline': 'parks',
'transit_stations_percent_change_from_baseline': 'transport',
'workplaces_percent_change_from_baseline': 'work',
'residential_percent_change_from_baseline': 'residential'
'retail_and_recreation_percent_change_from_baseline': 'retail_recreation',
'grocery_and_pharmacy_percent_change_from_baseline': 'grocery',
'parks_percent_change_from_baseline': 'parks',
'transit_stations_percent_change_from_baseline': 'transport',
'workplaces_percent_change_from_baseline': 'work',
'residential_percent_change_from_baseline': 'residential'
}
data = data.rename(columns=variable_mapping)
data = data.set_index("date")
data.index.freq = 'D'
data = data[list(variable_mapping.values())]
df = df.rename(columns=variable_mapping)

if provincial == True:
# Extract region names and append Brussels
regions = list(df['sub_region_2'].dropna().unique())
regions.insert(0,'Brussels')
# Build multiindex dataframe
iterables = [df['date'].unique(), regions]
index = pd.MultiIndex.from_product(iterables, names=["date", "region"])
new_df = pd.DataFrame(index=index, columns=list(variable_mapping.values()))
# Loop over regions
for region in regions:
if region == 'Brussels':
new_df.loc[(slice(None),region),:] = df.loc[(df['sub_region_1'] == 'Brussels')][list(variable_mapping.values())].values
else:
new_df.loc[(slice(None),region),:] = df[df['sub_region_2']==region][list(variable_mapping.values())].values
df = new_df
else:

# Extract national values
df=df[df['sub_region_1'].isnull().values]
df = df.set_index("date")
df.index.freq = 'D'
df = df[list(variable_mapping.values())]

if filename_plot and not plot:
print("Filename plot has no effect, plot is not activated. Set `plot=True` to create plot.")

if plot:
fig, ax = google_mobility(data)
if provincial:
print("\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print("Plotting of provincial-level google community mobility data is not implemented (yet).")
print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")

fig, ax = google_mobility(df)

if filename_plot:
plt.savefig(filename_plot, dpi=600, bbox_inches='tight',
orientation='portrait', papertype='a4')
else:
plt.show()

return data
return df



Expand Down
6 changes: 3 additions & 3 deletions src/covid19model/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ class COVID19_SEIRD_spatial_vacc(BaseModel):
# En verder moet je aan de start van je code Nc = real_Nc plaatsen

state_names = ['S', 'E', 'I', 'A', 'M', 'C', 'C_icurec', 'ICU', 'R', 'D', 'H_in', 'H_out', 'H_tot', 'R_C', 'R_ICU', 'S_v', 'E_v', 'I_v', 'A_v', 'M_v', 'C_v', 'C_icurec_v', 'ICU_v', 'R_v']
parameter_names = ['beta_R', 'beta_U', 'beta_M', 'alpha', 'K_inf1', 'K_inf2', 'K_hosp', 'sigma', 'omega', 'zeta', 'da', 'dm', 'dc_R', 'dc_D', 'dICU_R', 'dICU_D', 'dICUrec', 'dhospital', 'e_i', 'e_s', 'e_h', 'e_a', 'd_vacc', 'xi', 'season_factor']
parameter_names = ['beta_R', 'beta_U', 'beta_M', 'alpha', 'K_inf1', 'K_inf2', 'K_hosp', 'sigma', 'omega', 'zeta', 'da', 'dm', 'dc_R', 'dc_D', 'dICU_R', 'dICU_D', 'dICUrec', 'dhospital', 'e_i', 'e_s', 'e_h', 'e_a', 'd_vacc', 'xi']
parameters_stratified_names = [['area', 'sg', 'p'], ['s','a','h', 'c', 'm_C','m_ICU', 'N_vacc']]
stratification = ['place','Nc'] # mobility and social interaction: name of the dimension (better names: ['nis', 'age'])
coordinates = ['place'] # 'place' is interpreted as a list of NIS-codes appropriate to the geography
Expand All @@ -1030,7 +1030,7 @@ class COVID19_SEIRD_spatial_vacc(BaseModel):
@staticmethod

def integrate(t, S, E, I, A, M, C, C_icurec, ICU, R, D, H_in, H_out, H_tot, R_C, R_ICU, S_v, E_v, I_v, A_v, M_v, C_v, C_icurec_v, ICU_v, R_v, # time + SEIRD classes
beta_R, beta_U, beta_M, alpha, K_inf1, K_inf2, K_hosp, sigma, omega, zeta, da, dm, dc_R, dc_D, dICU_R, dICU_D, dICUrec, dhospital, e_i, e_s, e_h, e_a, d_vacc, xi, season_factor, # SEIRD parameters
beta_R, beta_U, beta_M, alpha, K_inf1, K_inf2, K_hosp, sigma, omega, zeta, da, dm, dc_R, dc_D, dICU_R, dICU_D, dICUrec, dhospital, e_i, e_s, e_h, e_a, d_vacc, xi, # SEIRD parameters
area, sg, p, # spatially stratified parameters. Might delete sg later.
s, a, h, c, m_C, m_ICU, N_vacc, # age-stratified parameters
place, Nc): # stratified parameters that determine stratification dimensions
Expand Down Expand Up @@ -1126,7 +1126,7 @@ def integrate(t, S, E, I, A, M, C, C_icurec, ICU, R, D, H_in, H_out, H_tot, R_C,

# Define spatially stratified infectivity beta with three degrees of freedom beta_R, beta_U, beta_M, based on stratification
# Default values for RU_threshold and UM_threshold are taken. beta[patch]
beta = season_factor*stratify_beta(beta_R, beta_U, beta_M, agg, area, T.sum(axis=1))
beta = stratify_beta(beta_R, beta_U, beta_M, agg, area, T.sum(axis=1))

# Define the number of contacts multiplier per patch and age, multip[patch,age]
# Note how part of the vaccinated people still contribute to infection pressure, but ...
Expand Down
2 changes: 1 addition & 1 deletion src/covid19model/visualization/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import matplotlib.dates as mdates

def google_mobility(data):
"""Create plot of google mobility data
"""Create plot of national-level google mobility data
Parameters
----------
Expand Down

0 comments on commit 6a62947

Please sign in to comment.