Skip to content

Commit

Permalink
Merge pull request #11 from EcoExtreML/add_utils_csv_to_nc
Browse files Browse the repository at this point in the history
Add utils functions to read csv files and save output in nc
  • Loading branch information
SarahAlidoost authored Feb 1, 2022
2 parents 9ed61ea + ff476ea commit ac1fbb8
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,11 @@ Integrated code of SCOPE and STEMMUS
(3) Run STEMMUS_SCOPE v1.0.0 on a different compute node:

Open the file "filesread.m" and set all paths at the top of this file. The rest of the workflow is the same as explained above.

(4) Converting `.csv` files to NetCDF files:

There is some files in utils directory in this repository. The utils are used to
read `.csv` files and save them in `.nc` format.

> An example NetCDF file is stored in the project directory to show the desired
structure of variables in one file.
26 changes: 26 additions & 0 deletions utils/csv_to_nc/Variables_will_be_in_NetCDF_file.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
,pri_cmip,short_name_alma,short_name_cmip,standard_name,long_name,definition,unit,direction,dimension,grp_alma,grp_cmip,subgrid,Available in STEMMUS-SCOPE,File name,Variable name in STEMMUS-SCOPE,,
1,1,SWnet,rss,surface_net_downward_shortwave_flux,Net shortwave radiation,"Incoming solar radiation less the simulated outgoing shortwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netshort,,
1,1,LWnet,rls,surface_net_downward_longwave_flux,Net longwave radiation,"Incident longwave radiation less the simulated outgoing longwave radiation, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Netlong,,
,2,SWdown,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rin,,
,2,LWdown,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,LEday,,Yes,radiation.csv,Rli,,
,2,SWup,rsus,surface_upwelling_shortwave_flux_in_air,Upward short-wave radiation,,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutShort,,
2,2,LWup,rlus,surface_upwelling_longwave_flux_in_air,Upward long-wave radiation,This upward longwave flux is to be compared to an ISCCP derived product.,W/m2,Upward,XYT,,LEday,,Yes,radiation.csv,HemisOutLong,,
1,1,Qle,hfls,surface_upward_latent_heat_flux,Latent heat flux,"Energy of evaporation, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,lEtot,,
1,1,Qh,hfss,surface_upward_sensible_heat_flux,Sensible heat flux,"Sensible energy, averaged over a grid cell",W/m2,Upward,XYT,,LEday,,Yes,fluxes.csv,Htot,,
1,1,Qg,hfds,surface_downward_heat_flux,Ground heat flux,"Heat flux into the ground, averaged over a grid cell",W/m2,Downward,XYT,,LEday,,Yes,fluxes.csv,Gtot,,
1,2,VegT,tcs,surface_canopy_skin_temperature,Vegetation Canopy Temperature,"Vegetation temperature, averaged over all vegetation types",K,-,XYT,,LEday,veg.,Yes,surftemp.csv,Tcave,,
1,2,BaresoilT,tgs,surface_ground_skin_temperature,Temperature of bare soil,Surface bare soil temperature,K,-,XYT,,LEday,baresoil,Yes,surftemp.csv,Tsave,,
2,1,SoilTemp,tsl,soil_temperature,Average layer soil temperature,Average soil temperature in each user-defined soil layer (3D variable),K,-,XYZT,,LEday,,Yes,Sim_Temp.csv,,"If soil layer thicknesses vary from one location to another, interpolate to a standard set of depths. Ideally, the interpolation should preserve the vertical integral.",
1,1,SoilMoist,mrlsl,moisture_content_of_soil_layer,Average layer soil moisture,"Soil water content in each user-defined soil layer (3D variable). Includes the liquid, vapor and solid phases of water in the soil.",kg/m2,-,XYZT,,LWday,,Yes,Sim_Theta.csv,,,
,2,AResist_rac,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,rac,,
,2,AResist_ras,ares,aerodynamic_resistance,Aerodynamic resistance,,s/m,-,XYT,,LWday,,Yes,aerodyn.csv,ras,,
,1,RH,hur,relative_humidity,Relative humidity,,%,-,XYT,,LWday,,Yes,ECdata.csv,RH,,
1,1,GPP,gpp,gross_primary_productivity_of_carbon,Gross Primary Production,Carbon Mass Flux out of Atmosphere due to Gross Primary Production on Land,Kg/m2/s,Downward,XYT,,LCmon,,Yes,fluxes.csv,Actot,,
1,1,SWdown_ec,rsds,surface_downwelling_shortwave_flux_in_air,Downward short-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rin,,
1,1,LWdown_ec,rlds,surface_downwelling_longwave_flux_in_air,Downward long-wave radiation,,W/m2,Downward,XYT,,L3hr,,,ECdata.csv,Rli,,
1,1,Qair,huss,specific_humidity,Near surface specific humidity,,kg/kg,-,XYT,,L3hr,,,ECdata.csv,Qair,,
1,1,Tair,ta,air_temperature,Near surface air temperature,,K,-,XYT,,L3hr,,,ECdata.csv,Ta,,
1,1,Psurf,ps,surface_air_pressure,Surface Pressure,,Pa,-,XYT,,L3hr,,,ECdata.csv,p,,
2,1,Wind,ws,wind_speed,Near surface wind speed,,m/s,-,XYT,,L3hr,,,ECdata.csv,u,,
,,Precip,pr,precipitation_flux,Precipitation rate,,kg/m2/s,Downward,XYT,,L3hr,,,ECdata.csv,Pre,,
,,CO2air,co2c,mole_fraction_of_carbon_dioxide_in_air,Near surface CO2 concentration,,-,-,XYT,,L3hr,,,ECdata.csv,CO2air,,
27 changes: 27 additions & 0 deletions utils/csv_to_nc/read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from netCDF4 import Dataset
import os

workdir = r'path_to_output_dir'

filename = workdir + '\\AU-Tum_2002-2017_OzFlux_Met.nc'
print(filename, 'contains the following:')
nc_fid = Dataset(filename, mode='r')
print(nc_fid)
x=nc_fid.variables['x']
y=nc_fid.variables['y']
time=nc_fid.variables['time']

filename = workdir + '\\output.nc'
if os.path.exists(filename):
print()
print(filename, 'contains the following:')
nc_fid2 = Dataset(filename, mode='r')
print(nc_fid2)
x2=nc_fid2.variables['x']
y2=nc_fid2.variables['y']
z2=nc_fid2.variables['z']
time2=nc_fid2.variables['time']
temp2=nc_fid2.variables['SoilTemp']
moist2=nc_fid2.variables['SoilMoist']


180 changes: 180 additions & 0 deletions utils/csv_to_nc/write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
from netCDF4 import Dataset

def split(s):
t=s.split('"')
u=[t[i] for i in range(0, len(t), 2)]
v=[t[i] for i in range(1, len(t), 2)]
#u=[u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(0, len(u))]
u=[u[i][(i%2):(len(u[i])-(i+1)%2)] for i in range(0, len(u)-1)] + [u[i][(i%2):] for i in range(len(u)-1, len(u))]
w = []
for i in range(len(u)):
w.extend(u[i].split(','))
if i < len(v):
w.append(v[i])
return w

def readcsv(filename, nrHeaderLines):
f = open(filename)
header = f.readline()
header = header.strip().split(',')
if nrHeaderLines > 1: # it is either 1 or 2
f.readline()
content = f.readlines()
data = {}
for line in content:
line = split(line.strip())
for i in range(0, len(header)):
if header[i] != '':
if header[i] not in data:
data[header[i]] = []
data[header[i]].append(line[i])
return data

def read_depths(filename):
f = open(filename)
depths = f.readline()
depths = depths.strip().strip('#').strip(',').split() # the first line has ,,,,,, at the end
depths = [float(depth) for depth in depths]
return depths

def read2d_transposed_unit(filename, nrHeaderLines, unit, depths):
f = open(filename)
f.readline() # skip the headerline(s)
if nrHeaderLines > 1: # it is either 1 or 2
f.readline()
content = f.readlines()
data = []
for line in content:
line = line.strip().split(',')
line = [float(l) for l in line] # convert this to float as we may want to scale it
if unit == 'K':
# Celsius to Kelvin : K = 273.15 + C
line = [273.15 + c for c in line]
elif unit == 'kg/m2':
# Yijian Zeng: m3/m3 to kg/m2: SM = VolumetricWaterContent * Density * Thickness
# VolumetricWaterContent: provided (m3/m3)
# Density: constant (water_density = 1000 kg per m3)
# Thickness (m): compute from depth (cm)
line = [(1000.0 * vwc * depth / 100.0) for vwc,depth in zip(line, depths)]
data.append(line)
return data

def generateNetCdf(lat, lon, outputfile, workdir):
# location and filenames:

filename_out = workdir + '\\' + outputfile
variables_filename = workdir + '\\Variables will be in NetCDF file.csv' # This is Sheet 2 from the Excel file, stored as csv, with the following changes, to make it work:
sim_theta = workdir + '\\Sim_Theta.csv'
sim_temp = workdir + '\\Sim_Temp.csv'

# Renamed radiation.dat to radiation.csv
# Renamed LEtot to lEtot
# Split AResist into AResist_rac and AResist_ras
# Renamed the 2nd occurence of SWdown and LWdown to SWdown_ec and LWdown_ex
# Note that the values in this Excel sheet file determine the metadata that the variables will receive

# specify additional metadata here:

additional_metadata = {
'tower_height': '80 m',
'license_type': 'CC BY 4.0',
'license_url': 'https://creativecommons.org/licenses/by/4.0/',
'latitude': lat,
'longitude': lon
}

# Our CSV reader can't guess the number of header-lines, so this is hardcoded here:

headerlines = {'aerodyn.csv': 2, 'ECdata.csv': 1, 'fluxes.csv': 2, 'radiation.csv': 2, 'Sim_Temp.csv': 2, 'Sim_Theta.csv': 2, 'surftemp.csv': 2}

print('Reading variable metadata from', "'" + variables_filename + "'")
variables = readcsv(variables_filename, 1)
depths = read_depths(sim_temp)

# Create a new empty netCDF file, in NETCDF3_CLASSIC format, just like the example file AU-Tum_2002-2017_OzFlux_Met.nc

print('Creating', "'" + filename_out + "'")
nc = Dataset(filename_out, mode='w', format='NETCDF3_CLASSIC')

# Create the dimensions, as required by netCDF

nc.createDimension('x', size=1)
nc.createDimension('y', size=1)
nc.createDimension('z', size=len(depths))
nc.createDimension('time', None)
nc.createDimension('nchar', size=200) # this is not used, however the example file has it

# Create the variables, as required by netCDF

var_x = nc.createVariable('x', 'float64', ('x'))
var_y = nc.createVariable('y', 'float64', ('y'))
var_z = nc.createVariable('z', 'float64', ('z'))
var_t = nc.createVariable('time', 'float64', ('time'))

# Add the generic metadata (taken from additional_metadata above)

for metadata in additional_metadata:
nc.setncattr(metadata, additional_metadata[metadata])

# Fill the x, y, time variables with values

var_x[:] = lon # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1
var_y[:] = lat # in the example AU-Tum_2002-2017_OzFlux_Met.nc this is 1
var_z.setncattr('units', 'depth in cm')
var_z[:] = depths
var_t.setncattr('units', 'seconds since 2002-01-01 00:00:00') # Note: I do not read this from the files; if it changes, edit this
var_t.setncattr('calendar', 'standard')
# The data of var_t is inserted at the end; when we "know" the length
var_t_length = None

# Add all parameters as a netCDF variable; also add the known metadata (units, long_name, Stemmus_name, definition)

data = {}

for i in range(len(variables['short_name_alma'])):
variable = variables['short_name_alma'][i]
file = variables['File name'][i]
stemmusname = variables['Variable name in STEMMUS-SCOPE'][i]
unit = variables['unit'][i]
long_name = variables['long_name'][i]
definition = variables['definition'][i]
dimensions = variables['dimension'][i]
var = None
if dimensions == 'XYT':
var = nc.createVariable(variable, 'float32', ('time','y','x'))
elif dimensions == 'XYZT':
var = nc.createVariable(variable, 'float32', ('time','z','y','x'))
var.setncattr('units', unit)
var.setncattr('long_name', long_name)
if stemmusname != '':
var.setncattr('Stemmus_name', stemmusname)
if definition != '':
var.setncattr('definition', definition)
if stemmusname != '':
if file not in data:
print('Reading data from file', "'" + file + "'")
data[file] = readcsv(workdir + '\\' + file, headerlines[file])
var[:] = data[file][stemmusname]
if var_t_length == None:
var_t_length = len(data[file][stemmusname])
else: # Sim_Temp or Sim_Theta
print('Reading data from file', "'" + file + "'")
matrix = read2d_transposed_unit(workdir + '\\' + file, headerlines[file], unit, depths)
var[:] = matrix

# Finally fill var_t with the nr of seconds per timestep
# Note: we don't take the numbers from the file, because Year + DoY is not as accurate (it becomes 3599.99, 7199.99 etc)
var_t[:] = [i*3600 for i in range(var_t_length)]

nc.close()

print('Done writing', "'" + filename_out + "'")

# main()
lat = -35.6566009521484
lon = 148.151702880859
workdir = r'path_to_output_dir' # This is the working folder; place all related files here (aerodyn.csv, ECdata.csv, fluxes.csv, radiation.csv, Sim_Temp.csv, Sim_Theta.csv, surftemp.csv, Variables will be in NetCDF file.csv)
site_name = 'MySite' # change as required
model_name = 'Stemmus' # update/correct if needed
outputfile = site_name + '_2002-2017_' + model_name + '.nc' # This is the output filename
generateNetCdf(lat, lon, outputfile, workdir)

0 comments on commit ac1fbb8

Please sign in to comment.