diff --git a/README.md b/README.md index b0c11e33..f466ad69 100644 --- a/README.md +++ b/README.md @@ -186,8 +186,10 @@ Dutch National supercomputer hosted at SURF. half-hour time step i.e. `365*24*2=17520`. To edit the config file, open the file with a text editor and change the - paths. The variable name e.g. `SoilPropertyPath` should not be changed. - Also, note a `/` is required at the end of each line. + paths. The `InputPath` and `OutputPath` are user-defined directories, make + sure they exist and you have right permissions. The variable name e.g. + `SoilPropertyPath` should not be changed. Also, note a `/` is required at + the end of each line. As explained above, the "InputPath" directory of the model is considered as the working/running directory and should include some data required by the @@ -296,8 +298,9 @@ See the [exe readme](./exe/README.md). ## Preparing the outputs of the model in NetCDF: -There are some files in `utils` directory in this repository. The utils are used -to read `.csv` files and save them in `.nc` format. +There is some files in utils directory in this repository. The utils are used to +read `.csv` files and save them in `.nc` format. See [utils +readme](./utils/csv_to_nc/README.md). > An example NetCDF file is stored in the project directory to show the desired structure of variables in one file. diff --git a/run_stemmus_scope_snellius.sh b/run_stemmus_scope_snellius.sh index 3d411e53..f24b3671 100644 --- a/run_stemmus_scope_snellius.sh +++ b/run_stemmus_scope_snellius.sh @@ -22,6 +22,10 @@ module load parallel/20210622-GCCcore-10.3.0 ### 2. To transfer environment vars, functions, ... source `which env_parallel.bash` +### python environment stemmus is needed to convert csv files to nc files +### see utils/csv_to_nc/README.md +. ~/mamba/bin/activate stemmus + ### 3. Create a function to loop over loop_func() { @@ -78,7 +82,14 @@ loop_func() { run_time=$(expr $end_time - $start_time) ## 3.9 Add some information to slurm*.out file later will be used. - echo "Run is COMPLETED. Model run time is $run_time s." >> $std_out + completed="COMPLETED" + echo "Run is $completed. Model run time is $run_time s." >> $std_out + + ## 3.10 Convert csv files to a nc file, if run is completed + if [[ -v completed ]]; + then + python utils/csv_to_nc/generate_netcdf_files.py --config_file $station_config --variable_file utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv + fi } ### 4. Create a log file for GNU parallel diff --git a/utils/csv_to_nc/README.md b/utils/csv_to_nc/README.md new file mode 100644 index 00000000..2fb64964 --- /dev/null +++ b/utils/csv_to_nc/README.md @@ -0,0 +1,56 @@ +# Converting `.csv` files to NetCDF files + +Currently, model outputs are several files in `csv` format. The model output +should be converted to one netcedf file according to Plumber protocol. To do so, +there is a file +[Variables_will_be_in_NetCDF_file.csv](./Variables_will_be_in_NetCDF_file.csv. +The file lists variables that should be in the netcdf file. Also, there is a +python script [csv_to_nc.py](./csv_to_nc.py) that contains main fucntions. Below +we explain how to use the python scripts. + +## 1. Create Conda environment + +> We need to do this step only once. + +We download and install Conda: + +```sh +wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-pypy3-Linux-x86_64.sh +bash Mambaforge-pypy3-Linux-x86_64.sh -b -p ~/mamba +``` + +Then, update base environment: + +```sh +. ~/mamba/bin/activate +mamba update --name base mamba +``` + +Finally, we create new conda environment called 'stemmus' with all required dependencies: + +```sh +cd STEMMUS_SCOPE/utils/csv_to_nc +mamba env create +``` + +## 2. Activate Conda environment + +> We need to do this step before running our python scripts. + +The environment can be activated with + +```sh +. ~/mamba/bin/activate stemmus +``` + +## 3. Run python script + +Open the configuration file [config_file_crib.txt][../../config_file_crib.txt] +or [config_file_snellius.txt][../../config_file_snellius.txt] and edit paths. Then, + +```sh +cd STEMMUS_SCOPE +python utils/csv_to_nc/generate_netcdf_files.py --config_file config_file_crib.txt --variable_file utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv +``` + +This will generate `ECdata.csv` and a netcdf file related to model output. \ No newline at end of file diff --git a/utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv b/utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv index 1c37848b..b26d3281 100644 --- a/utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv +++ b/utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv @@ -1,26 +1,27 @@ -,pri_cmip,short_name_alma,short_name_cmip,standard_name,long_name,definition,unit,direction,dimension,grp_alma,grp_cmip,subgrid,Available in STEMMUS-SCOPE,File name,Variable name in STEMMUS-SCOPE,, -1,1,SWnet,rss,surface_net_downward_shortwave_flux,Net shortwave radiation,"Incoming solar radiation less the simulated outgoing shortwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netshort,, -1,1,LWnet,rls,surface_net_downward_longwave_flux,Net longwave radiation,"Incident longwave radiation less the simulated outgoing longwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netlong,, -,2,SWdown,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rin,, -,2,LWdown,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rli,, -,2,SWup,rsus,surface_upwelling_shortwave_flux_in_air,Upward short-wave radiation,,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutShort,, -2,2,LWup,rlus,surface_upwelling_longwave_flux_in_air,Upward long-wave radiation,This upward longwave flux is to be compared to an ISCCP derived product.,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutLong,, -1,1,Qle,hfls,surface_upward_latent_heat_flux,Latent heat flux,"Energy of evaporation, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,lEtot,, -1,1,Qh,hfss,surface_upward_sensible_heat_flux,Sensible heat flux,"Sensible energy, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,Htot,, -1,1,Qg,hfds,surface_downward_heat_flux,Ground heat flux,"Heat flux into the ground, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,fluxes.csv,Gtot,, -1,2,VegT,tcs,surface_canopy_skin_temperature,Vegetation Canopy Temperature,"Vegetation temperature, averaged over all vegetation types",K,-,XYT,,LEday,veg.,Yes,surftemp.csv,Tcave,, -1,2,BaresoilT,tgs,surface_ground_skin_temperature,Temperature of bare soil,Surface bare soil temperature,K,-,XYT,,LEday,baresoil,Yes,surftemp.csv,Tsave,, -2,1,SoilTemp,tsl,soil_temperature,Average layer soil temperature,Average soil temperature in each user-defined soil layer (3D variable),K,-,XYZT,,LEday,,Yes,Sim_Temp.csv,,"If soil layer thicknesses vary from one location to another, interpolate to a standard set of depths. Ideally, the interpolation should preserve the vertical integral.", -1,1,SoilMoist,mrlsl,moisture_content_of_soil_layer,Average layer soil moisture,"Soil water content in each user-defined soil layer (3D variable). Includes the liquid, vapor and solid phases of water in the soil.",kg/m2,-,XYZT,,LWday,,Yes,Sim_Theta.csv,,, -,2,AResist_rac,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,rac,, -,2,AResist_ras,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,ras,, -,1,RH,hur,relative_humidity,Relative humidity,,%,-,XYT,,LWday,,Yes,ECdata.csv,RH,, -1,1,GPP,gpp,gross_primary_productivity_of_carbon,Gross Primary Production,Carbon Mass Flux out of Atmosphere due to Gross Primary Production on Land,Kg/m2/s,Downward,XYT,,LCmon,,Yes,fluxes.csv,Actot,, -1,1,SWdown_ec,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rin,, -1,1,LWdown_ec,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rli,, -1,1,Qair,huss,specific_humidity,Near surface specific humidity,,kg/kg,-,XYT,,L3hr,,,ECdata.csv,Qair,, -1,1,Tair,ta,air_temperature,Near surface air temperature,,K,-,XYT,,L3hr,,,ECdata.csv,Ta,, -1,1,Psurf,ps,surface_air_pressure,Surface Pressure,,Pa,-,XYT,,L3hr,,,ECdata.csv,p,, -2,1,Wind,ws,wind_speed,Near surface wind speed,,m/s,-,XYT,,L3hr,,,ECdata.csv,u,, -,,Precip,pr,precipitation_flux,Precipitation rate,,kg/m2/s,Downward,XYT,,L3hr,,,ECdata.csv,Pre,, -,,CO2air,co2c,mole_fraction_of_carbon_dioxide_in_air,Near surface CO2 concentration,,-,-,XYT,,L3hr,,,ECdata.csv,CO2air,, +,pri_cmip,short_name_alma,short_name_cmip,standard_name,long_name,definition,unit,direction,dimension,grp_alma,grp_cmip,subgrid,Available in STEMMUS-SCOPE,File name,Variable name in STEMMUS-SCOPE, +1,1,SWnet,rss,surface_net_downward_shortwave_flux,Net shortwave radiation,"Incoming solar radiation less the simulated outgoing shortwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netshort, +1,1,LWnet,rls,surface_net_downward_longwave_flux,Net longwave radiation,"Incident longwave radiation less the simulated outgoing longwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netlong, +,2,SWdown,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rin, +,2,LWdown,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rli, +,2,SWup,rsus,surface_upwelling_shortwave_flux_in_air,Upward short-wave radiation,,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutShort, +2,2,LWup,rlus,surface_upwelling_longwave_flux_in_air,Upward long-wave radiation,This upward longwave flux is to be compared to an ISCCP derived product.,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutLong, +1,1,Qle,hfls,surface_upward_latent_heat_flux,Latent heat flux,"Energy of evaporation, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,lEtot, +1,1,Qh,hfss,surface_upward_sensible_heat_flux,Sensible heat flux,"Sensible energy, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,Htot, +1,1,Qg,hfds,surface_downward_heat_flux,Ground heat flux,"Heat flux into the ground, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,fluxes.csv,Gtot, +1,2,VegT,tcs,surface_canopy_skin_temperature,Vegetation Canopy Temperature,"Vegetation temperature, averaged over all vegetation types",K,-,XYT,,LEday,veg.,Yes,surftemp.csv,Tcave, +1,2,BaresoilT,tgs,surface_ground_skin_temperature,Temperature of bare soil,Surface bare soil temperature,K,-,XYT,,LEday,baresoil,Yes,surftemp.csv,Tsave, +2,1,SoilTemp,tsl,soil_temperature,Average layer soil temperature,Average soil temperature in each user-defined soil layer (3D variable),K,-,XYZT,,LEday,,Yes,Sim_Temp.csv,,"If soil layer thicknesses vary from one location to another, interpolate to a standard set of depths. Ideally, the interpolation should preserve the vertical integral." +1,1,SoilMoist,mrlsl,moisture_content_of_soil_layer,Average layer soil moisture,"Soil water content in each user-defined soil layer (3D variable). Includes the liquid, vapor and solid phases of water in the soil.",kg/m2,-,XYZT,,LWday,,Yes,Sim_Theta.csv,, +,2,AResist_rac,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,rac, +,2,AResist_ras,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,ras, +,1,RH,hur,relative_humidity,Relative humidity,,%,-,XYT,,LWday,,Yes,ECdata.csv,RH, +1,1,GPP,gpp,gross_primary_productivity_of_carbon,Gross Primary Production,Carbon Mass Flux out of Atmosphere due to Gross Primary Production on Land,kg/m2/s,Downward,XYT,,LCmon,,Yes,fluxes.csv,GPP, +1,1,SWdown_ec,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rin, +1,1,LWdown_ec,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rli, +1,1,Qair,huss,specific_humidity,Near surface specific humidity,,kg/kg,-,XYT,,L3hr,,,ECdata.csv,Qair, +1,1,Tair,ta,air_temperature,Near surface air temperature,,K,-,XYT,,L3hr,,,ECdata.csv,Ta, +1,1,Psurf,ps,surface_air_pressure,Surface Pressure,,Pa,-,XYT,,L3hr,,,ECdata.csv,p, +2,1,Wind,ws,wind_speed,Near surface wind speed,,m/s,-,XYT,,L3hr,,,ECdata.csv,u, +,,Precip,pr,precipitation_flux,Precipitation rate,,kg/m2/s,Downward,XYT,,L3hr,,,ECdata.csv,Pre, +1,1,NEE,nep,surface_net_downward_mass_flux_of_carbon_dioxide_expressed_as_carbon_due_to_all_land_processes_excluding_anthropogenic_land_use_change,Net Ecosystem Exchange,Net Carbon Mass Flux out of Atmophere due to Net Ecosystem Productivity on Land.,kg/m2/s,Downward,XYT,,LCmon,,Yes,fluxes.csv,NEE, +1,1,Rnet,rss,surface_net_radiation_flux,Net radiation,"Net shortwave radiation less the simulated net longwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rntot, diff --git a/utils/csv_to_nc/__init__.py b/utils/csv_to_nc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/utils/csv_to_nc/write.py b/utils/csv_to_nc/csv_to_nc.py similarity index 59% rename from utils/csv_to_nc/write.py rename to utils/csv_to_nc/csv_to_nc.py index 02520764..ecae356f 100644 --- a/utils/csv_to_nc/write.py +++ b/utils/csv_to_nc/csv_to_nc.py @@ -1,180 +1,239 @@ -from netCDF4 import Dataset - -def split(s): - t=s.split('"') - u=[t[i] for i in range(0, len(t), 2)] - v=[t[i] for i in range(1, len(t), 2)] - #u=[u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(0, len(u))] - u=[u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(0, len(u)-1)] + [u[i][(i%2):] for i in range(len(u)-1, len(u))] - w = [] - for i in range(len(u)): - w.extend(u[i].split(',')) - if i < len(v): - w.append(v[i]) - return w - -def readcsv(filename, nrHeaderLines): - f = open(filename) - header = f.readline() - header = header.strip().split(',') - if nrHeaderLines > 1: # it is either 1 or 2 - f.readline() - content = f.readlines() - data = {} - for line in content: - line = split(line.strip()) - for i in range(0, len(header)): - if header[i] != '': - if header[i] not in data: - data[header[i]] = [] - data[header[i]].append(line[i]) - return data - -def read_depths(filename): - f = open(filename) - depths = f.readline() - depths = depths.strip().strip('#').strip(',').split() # the first line has ,,,,,, at the end - depths = [float(depth) for depth in depths] - return depths - -def read2d_transposed_unit(filename, nrHeaderLines, unit, depths): - f = open(filename) - f.readline() # skip the headerline(s) - if nrHeaderLines > 1: # it is either 1 or 2 - f.readline() - content = f.readlines() - data = [] - for line in content: - line = line.strip().split(',') - line = [float(l) for l in line] # convert this to float as we may want to scale it - if unit == 'K': - # Celsius to Kelvin : K = 273.15 + C - line = [273.15 + c for c in line] - elif unit == 'kg/m2': - # Yijian Zeng: m3/m3 to kg/m2: SM = VolumetricWaterContent * Density * Thickness - # VolumetricWaterContent: provided (m3/m3) - # Density: constant (water_density = 1000 kg per m3) - # Thickness (m): compute from depth (cm) - line = [(1000.0 * vwc * depth / 100.0) for vwc,depth in zip(line, depths)] - data.append(line) - return data - -def generateNetCdf(lat, lon, outputfile, workdir): - # location and filenames: - - filename_out = workdir + '\\' + outputfile - variables_filename = workdir + '\\Variables will be in NetCDF file.csv' # This is Sheet 2 from the Excel file, stored as csv, with the following changes, to make it work: - sim_theta = workdir + '\\Sim_Theta.csv' - sim_temp = workdir + '\\Sim_Temp.csv' - - # Renamed radiation.dat to radiation.csv - # Renamed LEtot to lEtot - # Split AResist into AResist_rac and AResist_ras - # Renamed the 2nd occurence of SWdown and LWdown to SWdown_ec and LWdown_ex - # Note that the values in this Excel sheet file determine the metadata that the variables will receive - - # specify additional metadata here: - - additional_metadata = { - 'tower_height': '80 m', - 'license_type': 'CC BY 4.0', - 'license_url': 'https://creativecommons.org/licenses/by/4.0/', - 'latitude': lat, - 'longitude': lon - } - - # Our CSV reader can't guess the number of header-lines, so this is hardcoded here: - - headerlines = {'aerodyn.csv': 2, 'ECdata.csv': 1, 'fluxes.csv': 2, 'radiation.csv': 2, 'Sim_Temp.csv': 2, 'Sim_Theta.csv': 2, 'surftemp.csv': 2} - - print('Reading variable metadata from', "'" + variables_filename + "'") - variables = readcsv(variables_filename, 1) - depths = read_depths(sim_temp) - - # Create a new empty netCDF file, in NETCDF3_CLASSIC format, just like the example file AU-Tum_2002-2017_OzFlux_Met.nc - - print('Creating', "'" + filename_out + "'") - nc = Dataset(filename_out, mode='w', format='NETCDF3_CLASSIC') - - # Create the dimensions, as required by netCDF - - nc.createDimension('x', size=1) - nc.createDimension('y', size=1) - nc.createDimension('z', size=len(depths)) - nc.createDimension('time', None) - nc.createDimension('nchar', size=200) # this is not used, however the example file has it - - # Create the variables, as required by netCDF - - var_x = nc.createVariable('x', 'float64', ('x')) - var_y = nc.createVariable('y', 'float64', ('y')) - var_z = nc.createVariable('z', 'float64', ('z')) - var_t = nc.createVariable('time', 'float64', ('time')) - - # Add the generic metadata (taken from additional_metadata above) - - for metadata in additional_metadata: - nc.setncattr(metadata, additional_metadata[metadata]) - - # Fill the x, y, time variables with values - - var_x[:] = lon # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1 - var_y[:] = lat # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1 - var_z.setncattr('units', 'depth in cm') - var_z[:] = depths - var_t.setncattr('units', 'seconds since 2002-01-01 00:00:00') # Note: I do not read this from the files; if it changes, edit this - var_t.setncattr('calendar', 'standard') - # The data of var_t is inserted at the end; when we "know" the length - var_t_length = None - - # Add all parameters as a netCDF variable; also add the known metadata (units, long_name, Stemmus_name, definition) - - data = {} - - for i in range(len(variables['short_name_alma'])): - variable = variables['short_name_alma'][i] - file = variables['File name'][i] - stemmusname = variables['Variable name in STEMMUS-SCOPE'][i] - unit = variables['unit'][i] - long_name = variables['long_name'][i] - definition = variables['definition'][i] - dimensions = variables['dimension'][i] - var = None - if dimensions == 'XYT': - var = nc.createVariable(variable, 'float32', ('time','y','x')) - elif dimensions == 'XYZT': - var = nc.createVariable(variable, 'float32', ('time','z','y','x')) - var.setncattr('units', unit) - var.setncattr('long_name', long_name) - if stemmusname != '': - var.setncattr('Stemmus_name', stemmusname) - if definition != '': - var.setncattr('definition', definition) - if stemmusname != '': - if file not in data: - print('Reading data from file', "'" + file + "'") - data[file] = readcsv(workdir + '\\' + file, headerlines[file]) - var[:] = data[file][stemmusname] - if var_t_length == None: - var_t_length = len(data[file][stemmusname]) - else: # Sim_Temp or Sim_Theta - print('Reading data from file', "'" + file + "'") - matrix = read2d_transposed_unit(workdir + '\\' + file, headerlines[file], unit, depths) - var[:] = matrix - - # Finally fill var_t with the nr of seconds per timestep - # Note: we don't take the numbers from the file, because Year + DoY is not as accurate (it becomes 3599.99, 7199.99 etc) - var_t[:] = [i*3600 for i in range(var_t_length)] - - nc.close() - - print('Done writing', "'" + filename_out + "'") - -# main() -lat = -35.6566009521484 -lon = 148.151702880859 -workdir = r'path_to_output_dir' # This is the working folder; place all related files here (aerodyn.csv, ECdata.csv, fluxes.csv, radiation.csv, Sim_Temp.csv, Sim_Theta.csv, surftemp.csv, Variables will be in NetCDF file.csv) -site_name = 'MySite' # change as required -model_name = 'Stemmus' # update/correct if needed -outputfile = site_name + '_2002-2017_' + model_name + '.nc' # This is the output filename -generateNetCdf(lat, lon, outputfile, workdir) +import pandas as pd + +from netCDF4 import Dataset, num2date +from pathlib import Path + +def generate_ec_data(forcing_path, forcing_name, output_path): + """ + Read forcing data and create EC data in csv format. + """ + + # read data + filename = Path(forcing_path, forcing_name) # care for windows/unix paths + nc_fid = Dataset(filename, mode='r') + + # Extract variables as numpy arrays + t = nc_fid.variables['time'][:].flatten()/3600/24 + + Ta = nc_fid.variables['Tair'][:].flatten()-273.15 + + Rin = nc_fid.variables['SWdown'][:].flatten() + + u = nc_fid.variables['Wind'][:].flatten() + + Rli = nc_fid.variables['LWdown'][:].flatten() + + p = nc_fid.variables['Psurf'][:].flatten()/100 + + RH = nc_fid.variables['RH'][:].flatten() + + LAI = nc_fid.variables['LAI'][:].flatten() + + nctime = nc_fid.variables['time'] + ncdate = num2date(nctime[:],units= nctime.units, calendar=nctime.calendar) + year = [date.year for date in ncdate] + + Pre = nc_fid.variables['Precip'][:].flatten()/10 + + CO2air = nc_fid.variables['CO2air'][:].flatten()*44/22.4 + + Qair = nc_fid.variables['Qair'][:].flatten() + + data = [t, Ta, Rin, u, Rli, p, RH, LAI, year, Pre, CO2air, Qair] + names = ['t', 'Ta', 'Rin', 'u', 'Rli', 'p', 'RH', 'LAI', 'year', 'Pre', 'CO2air', 'Qair'] + + # Write to csv + df = pd.DataFrame.from_dict(dict(zip(names, data))) + filename = Path(output_path, "ECdata.csv") + df.to_csv(filename, index=False) + + # Extra information + lat = float(nc_fid.variables['latitude'][:].flatten()[0]) + lon = float(nc_fid.variables['longitude'][:].flatten()[0]) + return filename, lat, lon, nctime + + +def split(s): + t=s.split('"') + u=[t[i] for i in range(0, len(t), 2)] + v=[t[i] for i in range(1, len(t), 2)] + #u=[u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(len(u))] + u=[ + u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(len(u)-1) + ] + [u[i][(i%2):] for i in range(len(u)-1, len(u))] + w = [] + for i in range(len(u)): + w.extend(u[i].split(',')) + if i < len(v): + w.append(v[i]) + return w + + +def readcsv(filename, nrHeaderLines): + f = open(filename) + header = f.readline() + header = header.strip().split(',') + if nrHeaderLines > 1: # it is either 1 or 2 + f.readline() + content = f.readlines() + data = {} + for line in content: + line = split(line.strip()) + for i in range(len(header)): + if header[i] != '': + if header[i] not in data: + data[header[i]] = [] + data[header[i]].append(line[i]) + return data + + +def read_depths(filename): + f = open(filename) + depths = f.readline() + depths = depths.strip().strip('#').strip(',').split() # the first line has ,,,,,, at the end + depths = [float(depth) for depth in depths] + return depths + + +def read2d_transposed_unit(filename, nrHeaderLines, unit, depths): + f = open(filename) + f.readline() # skip the headerline(s) + if nrHeaderLines > 1: # it is either 1 or 2 + f.readline() + content = f.readlines() + data = [] + for line in content: + line = line.strip().split(',') + line = [float(l) for l in line] # convert this to float as we may want to scale it + if unit == 'K': + # Celsius to Kelvin : K = 273.15 + C + line = [273.15 + c for c in line] + elif unit == 'kg/m2': + # Yijian Zeng: m3/m3 to kg/m2: SM = VolumetricWaterContent * Density * Thickness + # VolumetricWaterContent: provided (m3/m3) + # Density: constant (water_density = 1000 kg per m3) + # Thickness (m): compute from depth (cm) + line = [(1000.0 * vwc * depth / 100.0) for vwc,depth in zip(line, depths)] + data.append(line) + return data + + +def generateNetCdf(lat, lon, nctime, output_dir, csvfiles_path, variables_filename): + # Renamed radiation.dat to radiation.csv + # Renamed LEtot to lEtot + # Split AResist into AResist_rac and AResist_ras + # Renamed the 2nd occurence of SWdown and LWdown to SWdown_ec and LWdown_ex + # Note that the values in this Excel sheet file determine the metadata that the variables will receive + + # specify additional metadata here: + + additional_metadata = { + 'model': 'STEMMUS_SCOPE', + 'institution': 'University of Twente; Northwest A&F University', + 'contact': 'Zhongbo Su, z.su@utwente.nl; Yijian Zeng, y.zeng@utwente.nl; Yunfei Wang, y.wang-3@utwente.nl', + 'license_type': 'CC BY 4.0', + 'license_url': 'https://creativecommons.org/licenses/by/4.0/', + 'latitude': lat, + 'longitude': lon + } + + # Our CSV reader can't guess the number of header-lines, so this is hardcoded here: + sim_theta = Path(csvfiles_path, 'Sim_Theta.csv') + sim_temp = Path(csvfiles_path, 'Sim_Temp.csv') + + headerlines = {'aerodyn.csv': 2, 'ECdata.csv': 1, 'fluxes.csv': 2, 'radiation.csv': 2, 'Sim_Temp.csv': 2, 'Sim_Theta.csv': 2, 'surftemp.csv': 2} + + print(f'Reading variable metadata from {variables_filename}') + variables = readcsv(variables_filename, 1) + depths = read_depths(sim_temp) + + # Create a new empty netCDF file, in NETCDF3_CLASSIC format, just like the example file AU-Tum_2002-2017_OzFlux_Met.nc + filename = f"{Path(csvfiles_path).stem}_STEMMUS_SCOPE.nc" + filename_out = Path(output_dir, filename) + print(f"Creating {filename_out} ") + nc = Dataset(filename_out, mode='w', format='NETCDF3_CLASSIC') + + # Create the dimensions, as required by netCDF + + nc.createDimension('x', size=1) + nc.createDimension('y', size=1) + nc.createDimension('z', size=len(depths)) + nc.createDimension('time', None) + nc.createDimension('nchar', size=200) # this is not used, however the example file has it + + # Create the variables, as required by netCDF + + var_x = nc.createVariable('x', 'float64', ('x')) + var_y = nc.createVariable('y', 'float64', ('y')) + var_z = nc.createVariable('z', 'float64', ('z')) + var_t = nc.createVariable('time', 'float64', ('time')) + var_latitude = nc.createVariable('latitude', 'float32', ('y','x')) + var_latitude.setncattr('long_name', 'Gridbox latitude') + var_latitude.setncattr('standard_name', 'latitude') + var_latitude.setncattr('units', 'degrees') + var_latitude[:] = lat + var_longitude = nc.createVariable('longitude', 'float32', ('y','x')) + var_longitude.setncattr('long_name', 'Gridbox longitude') + var_longitude.setncattr('standard_name', 'longitude') + var_longitude.setncattr('units', 'degrees') + var_longitude[:] = lon + + # Add the generic metadata (taken from additional_metadata above) + + for metadata in additional_metadata: + nc.setncattr(metadata, additional_metadata[metadata]) + + # Fill the x, y, time variables with values + + var_x[:] = lon # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1 + var_y[:] = lat # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1 + var_z.setncattr('units', 'depth in cm') + var_z[:] = depths + var_t.setncattr('units', f'{nctime.units}') + var_t.setncattr('calendar', f'{nctime.calendar}') + # The data of var_t is inserted at the end; when we "know" the length + var_t_length = None + + # Add all parameters as a netCDF variable; also add the known metadata (units, long_name, Stemmus_name, definition) + + data = {} + + for i in range(len(variables['short_name_alma'])): + variable = variables['short_name_alma'][i] + file = variables['File name'][i] + stemmusname = variables['Variable name in STEMMUS-SCOPE'][i] + unit = variables['unit'][i] + long_name = variables['long_name'][i] + standard_name = variables['standard_name'][i] + definition = variables['definition'][i] + dimensions = variables['dimension'][i] + var = None + if dimensions == 'XYT': + var = nc.createVariable(variable, 'float32', ('time','y','x')) + elif dimensions == 'XYZT': + var = nc.createVariable(variable, 'float32', ('time','y','x','z')) + var.setncattr('units', unit) + var.setncattr('long_name', long_name) + var.setncattr('standard_name', standard_name) + if stemmusname != '': + var.setncattr('STEMMUS-SCOPE_name', stemmusname) + if definition != '': + var.setncattr('definition', definition) + if stemmusname != '': + if file not in data: + print(f'Reading data from file: {file}') + data[file] = readcsv(Path(csvfiles_path, file), headerlines[file]) + var[:] = data[file][stemmusname] + if var_t_length is None: + var_t_length = len(data[file][stemmusname]) + else: # Sim_Temp or Sim_Theta + print(f'Reading data from file: {file}') + matrix = read2d_transposed_unit(Path(csvfiles_path, file), headerlines[file], unit, depths) + var[:] = matrix + + # Finally fill var_t with the nr of seconds per timestep + # Note: we don't take the numbers from the file, because Year + DoY is not as accurate (it becomes 3599.99, 7199.99 etc) + var_t[:] = [i*1800 for i in range(var_t_length)] + + nc.close() + return filename_out \ No newline at end of file diff --git a/utils/csv_to_nc/environment.yml b/utils/csv_to_nc/environment.yml new file mode 100644 index 00000000..ec60be6a --- /dev/null +++ b/utils/csv_to_nc/environment.yml @@ -0,0 +1,8 @@ +--- +name: stemmus +channels: + - conda-forge +dependencies: + - python=3.9 + - netcdf4>=1.5.8,<2 + - pandas diff --git a/utils/csv_to_nc/generate_netcdf_files.py b/utils/csv_to_nc/generate_netcdf_files.py new file mode 100644 index 00000000..cd9eb605 --- /dev/null +++ b/utils/csv_to_nc/generate_netcdf_files.py @@ -0,0 +1,45 @@ +"""Script to generate netcdf file from model output csv files. + +python generate_netcdf_files.py --config_file config_file_crib.txt --variable_file Variables_will_be_in_NetCDF_file.csv + +""" +import argparse +import csv_to_nc + +from pathlib import Path + + +def cli(): + """Generate netcdf file from model output csv files.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--config_file', '-c', + default=Path(Path(__file__).parents[2],'config_file_crib.txt'), + help='Config file') + parser.add_argument('--variable_file', '-v', + default=Path(Path(__file__).parents[0],'Variables_will_be_in_NetCDF_file.csv'), + help='CSV file with variables that will be in NetCDF') + args = parser.parse_args() + + with open(args.config_file) as file: + paths = dict(line.strip().split('=') for line in file) + + output_path = paths['OutputPath'] + forcing_path = paths['ForcingPath'] + forcing_filename = paths['ForcingFileName'] + + # Find model output sub-directory linked to the forcing_filename + station_name = forcing_filename.split("_")[0] + model_output_dir = sorted(Path(output_path).glob(f"{station_name}*"))[-1] + + # Generate EC data from forcing data + ECdata_filename, lat, lon, nctime = csv_to_nc.generate_ec_data(forcing_path, forcing_filename, model_output_dir) + print(f"{ECdata_filename}") + + # # Generate netcdf file from model output csv files + netcdf_filename = csv_to_nc.generateNetCdf(lat, lon, nctime, model_output_dir, model_output_dir, args.variable_file) + print(f"Done writing {netcdf_filename}") + + +if __name__ == "__main__": + cli() + diff --git a/utils/csv_to_nc/read.py b/utils/csv_to_nc/read.py deleted file mode 100644 index 9087af9e..00000000 --- a/utils/csv_to_nc/read.py +++ /dev/null @@ -1,27 +0,0 @@ -from netCDF4 import Dataset -import os - -workdir = r'path_to_output_dir' - -filename = workdir + '\\AU-Tum_2002-2017_OzFlux_Met.nc' -print(filename, 'contains the following:') -nc_fid = Dataset(filename, mode='r') -print(nc_fid) -x=nc_fid.variables['x'] -y=nc_fid.variables['y'] -time=nc_fid.variables['time'] - -filename = workdir + '\\output.nc' -if os.path.exists(filename): - print() - print(filename, 'contains the following:') - nc_fid2 = Dataset(filename, mode='r') - print(nc_fid2) - x2=nc_fid2.variables['x'] - y2=nc_fid2.variables['y'] - z2=nc_fid2.variables['z'] - time2=nc_fid2.variables['time'] - temp2=nc_fid2.variables['SoilTemp'] - moist2=nc_fid2.variables['SoilMoist'] - -