Skip to content

Commit

Permalink
Updating products and figures
Browse files Browse the repository at this point in the history
  • Loading branch information
mdugenne committed Apr 21, 2024
1 parent 7303d95 commit b5725df
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
Binary file modified ancillary/plankton_annotated_taxonomy.xlsx
Binary file not shown.
3 changes: 2 additions & 1 deletion scripts/5_compute_taxa_NBSS.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def report_product(biovolume_spectra,biomass_spectra,taxo_group='Taxon'):
'range_biomass_bin':np.concatenate(np.diff(np.resize(np.append(biomass_bins[0], np.append(np.repeat(biomass_bins[1:-1], repeats=2), biomass_bins[len(biomass_bins) - 1])), (len(biomass_bins) - 1, 2)), axis=1)), # in g
'biomass_mid':stats.gmean(np.resize( np.append(biomass_bins[0], np.append(np.repeat(biomass_bins[1:-1], repeats=2), biomass_bins[len(biomass_bins) - 1])), (len(biomass_bins) - 1, 2)), axis=1) #in g
})

data_bins['biomassClasses']=pd.cut(biomass_bins, biomass_bins).categories.values.astype(str)
# Convert all size units to cubic micrometers and mass units to gram
df_allometry['C_Intercept']=df_allometry['C_Intercept']*(df_allometry.Size_unit.apply(lambda x: PQ(1,'cubic_micrometer').to(x).magnitude)).values/(df_allometry.Elemental_mass_unit.apply(lambda x: PQ(1,'gram').to(x).magnitude)).values
Expand Down Expand Up @@ -396,7 +397,7 @@ def report_product(biovolume_spectra,biomass_spectra,taxo_group='Taxon'):
NBSS_binned_all_thres_PFT_biomass = NBSS_binned_all_biomass_thres.dropna(subset=['NB']).groupby(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth','biomassClasses', 'biomass_mid', 'range_biomass_bin', 'PFT']).apply(lambda x: pd.Series({ 'Validation_percentage': np.round(np.nansum((x.Validation_percentage.astype(float) * x.ROI_number_sum.astype( float)) / x.ROI_number_sum.astype(float).sum()), 2),'Biomass_mean': np.nansum((x.Biomass_mean.astype(float) * x.ROI_number_sum) / np.nansum(x.ROI_number_sum)),'size_class_pixel': np.nanmean(x.size_class_pixel.astype(float)), 'ROI_number_sum': np.nansum(x.ROI_number_sum.astype( float)),'ROI_abundance_mean': np.nansum(( x.ROI_abundance_mean.astype(float) * x.ROI_number_sum.astype(float)) / np.nansum(x.ROI_number_sum.astype(float))),'NB': np.nansum(x.NB.astype( float)), 'count_uncertainty': poisson.pmf( k=np.nansum(x.ROI_number_sum.astype( float)), mu=np.nansum(x.ROI_number_sum.astype( float))),'size_uncertainty': np.nanmean( x.size_uncertainty.astype(float)), 'logNB': np.log10( np.nansum(x.NB.astype( float))), 'logSize': np.log10( x.biomass_mid.astype( float).unique()[0]/data_bins.biomass_mid[0])})).reset_index().sort_values( ['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT', 'biomass_mid']).reset_index(drop=True)
NBSS_binned_all_PFT_biomass=NBSS_binned_all_thres_PFT_biomass.groupby(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT', 'Validation_percentage']).apply(lambda x: threshold_func(x,empty_bins = 1,threshold_count=1,threshold_size=1)).reset_index(drop=True).sort_values( ['date_bin', 'Station_location','midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT','biomass_mid']).reset_index(drop=True)
NBSS_binned_all_PFT_biomass['Total_biomass'] = NBSS_binned_all_PFT_biomass.groupby(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT', 'Validation_percentage'], dropna=False).apply(lambda x: pd.DataFrame({'Total_biomass': 1e+06 * np.nansum(x.NB * x.range_biomass_bin)}, index=list( x.index))).reset_index().Total_biomass.values # in microgram per liters or milligram per cubic meters
lin_fit_PFT_biomass = NBSS_binned_all_PFT_biomass.groupby(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT', 'Validation_percentage']).apply(lambda x: linear_fit_func(x.drop(columns=['logSize']).assign(logSize=np.log10(x.biomass_mid / data_bins.biomass_mid[0])))).reset_index().drop(columns='level_' + str( len(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT'] + [ 'Validation_percentage']))).sort_values( ['PFT','date_bin', 'Station_location','midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth']).reset_index(drop=True)
lin_fit_PFT_biomass = NBSS_binned_all_PFT_biomass.groupby(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT', 'Validation_percentage']).apply(lambda x: linear_fit_func(x.drop(columns=['logSize']).assign(logSize=np.log10(x.biomass_mid / data_bins.biomass_mid[0]),PSD=np.nan,logECD=np.nan))).reset_index().drop(columns='level_' + str( len(['date_bin', 'Station_location', 'midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth', 'PFT'] + [ 'Validation_percentage']))).sort_values( ['PFT','date_bin', 'Station_location','midLatBin', 'midLonBin', 'Min_obs_depth', 'Max_obs_depth']).reset_index(drop=True)
## Save subbin datafiles
NBSS_raw_biomass = NBSS_binned_all_biomass.filter(['date_bin', 'midLatBin', 'midLonBin','Taxon','PFT','Validation_percentage','biomass_mid','range_biomass_bin', 'NB', 'PSD','Min_obs_depth', 'Max_obs_depth','Total_biomass'], axis=1)
NBSS_raw_biomass_PFT = NBSS_binned_all_PFT_biomass.filter(['date_bin', 'midLatBin', 'midLonBin','PFT','Validation_percentage','biomass_mid','range_biomass_bin', 'NB', 'PSD','Min_obs_depth', 'Max_obs_depth','Total_biomass'], axis=1)
Expand Down
2 changes: 1 addition & 1 deletion scripts/script_pssdb_firstrelease.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

## Workflow starts here:

path_to_datafile=(Path(cfg['git_dir']).expanduser()/ cfg['dataset_subdir']) / 'NBSS_data' / 'NBSS_ver_03_2024' / 'Raw'
path_to_datafile=(Path(cfg['git_dir']).expanduser()/ cfg['dataset_subdir']) / 'NBSS_data' / 'NBSS_ver_04_2024' / 'Raw'
path_files= list(path_to_datafile.glob('*_1b_*.csv'))
path_files_1a=list(path_to_datafile.glob('*_1a_*.csv'))
df=pd.concat(map(lambda path: pd.read_table(path,sep=',').assign(Instrument=path.name[0:path.name.find('_')]),path_files)).drop_duplicates().reset_index(drop=True)
Expand Down

0 comments on commit b5725df

Please sign in to comment.