From 096e2432e140aeb5509e009c85ef0c3f0204d8a5 Mon Sep 17 00:00:00 2001 From: JonKing93 Date: Tue, 3 Nov 2020 13:58:31 -0700 Subject: [PATCH] Alpha release 3.1.0 1. Bug fix for relative paths 2. Added opendap functionality 3. Added ensemble hasnan and naming --- @dash/checkStrsInList.m | 4 +- @ensemble/ensemble.m | 18 +- @ensemble/hasnan.m | 46 ++++ @ensemble/rename.m | 18 ++ @ensemble/update.m | 4 +- @ensemble/useMembers.m | 11 +- @ensemble/useVariables.m | 11 +- @ensembleMetadata/ensembleMetadata.m | 23 +- @ensembleMetadata/rename.m | 16 ++ @gridfile/add.m | 30 ++- @gridfile/buildSourcesForFiles.m | 2 +- @gridfile/collectFullPaths.m | 7 +- @gridfile/convertSourceToPrimitives.m | 4 +- @gridfile/findFileSources.m | 2 +- @gridfile/info.m | 4 +- @gridfile/renameSources.m | 8 +- @gridfile/repeatedLoad.m | 2 +- @gridfile/sourceFilepath.m | 2 +- @stateVector/sequence.m | 3 +- @stateVectorVariable/sequence.m | 23 +- @stateVectorVariable/specifyMetadata.m | 2 +- dataSource.m | 311 ++++++++++++------------- matSource.m | 49 ++-- ncSource.m | 45 ++-- opendapSource.m | 72 ++++++ 25 files changed, 456 insertions(+), 261 deletions(-) create mode 100644 @ensemble/hasnan.m create mode 100644 @ensemble/rename.m create mode 100644 @ensembleMetadata/rename.m create mode 100644 opendapSource.m diff --git a/@dash/checkStrsInList.m b/@dash/checkStrsInList.m index 5f570b65..e5f20c77 100644 --- a/@dash/checkStrsInList.m +++ b/@dash/checkStrsInList.m @@ -27,9 +27,9 @@ % Informative error message badName = name; if numel(input)>1 - badName = sprintf('Element %.f in %s (%s)', bad, name, input(bad)); + badName = sprintf('Element %.f in %s', bad, name); end - error('%s is not a %s. Allowed values are %s.', badName, listName, dash.messageList(list)); + error('%s (%s) is not a %s. Allowed values are %s.', badName, input(bad), listName, dash.messageList(list)); end end \ No newline at end of file diff --git a/@ensemble/ensemble.m b/@ensemble/ensemble.m index 66e491e8..5d6471c6 100644 --- a/@ensemble/ensemble.m +++ b/@ensemble/ensemble.m @@ -15,8 +15,9 @@ properties (SetAccess = private) file; % The .ens file associated with the object + name; % The name of the ensemble object - hasnan; % Whether a variable has NaN in an ensemble member + has_nan; % Whether a variable has NaN in an ensemble member metadata; % Ensemble metadata object for the saved state vector ensemble stateVector; % The stateVector object used to build the ensemble @@ -26,7 +27,7 @@ % Constructor methods - function obj = ensemble(filename) + function obj = ensemble(filename, name) %% Creates a new ensemble object % % obj = ensemble(filename) @@ -37,12 +38,17 @@ % Returns an ensemble object for a .ens file with the specified % full file path. % + % obj = ensemble(filename, name) + % Provides an identifying name for the ensemble object. + % % ----- Inputs ----- % % filename: The name of a .ens file on the active path. A string. % % fullname: The full file path to a .ens file. A string. % + % name: An identifying name for the ensemble object. A string. + % % ----- Outputs ----- % % obj: An ensemble object for the specified .ens file. @@ -55,6 +61,12 @@ % Members and variables are unspecified. obj.members = []; obj.variables = []; + + % Update name (error checking via ensembleMetadata) + if exist('name','var') + obj.metadata = obj.metadata.rename(name); + obj.name = name; + end end end @@ -75,5 +87,7 @@ obj = useVariables(obj, variables); varNames = variableNames(obj); s = info(obj); + nanMembers = hasnan(obj, varNames); + obj = rename(obj, newName); end end \ No newline at end of file diff --git a/@ensemble/hasnan.m b/@ensemble/hasnan.m new file mode 100644 index 00000000..7ce2cd39 --- /dev/null +++ b/@ensemble/hasnan.m @@ -0,0 +1,46 @@ +function[nanMembers] = hasnan(obj, varNames) +%% Indicates whether ensemble members contain NaN elements for variables +% in a state vector ensemble. +% +% nanMembers = obj.hasnan +% Returns a row vector that indicates which ensemble members contain NaN +% elements. +% +% nanVars = obj.hasnan(varNames) +% Returns a logical matrix that indicates whether specified variables have +% NaN elements in ensemble members. +% +% nanVars = obj.hasnan([]) +% Indicates whether variables have NaN elements in ensemble members for +% each variable in a state vector. +% +% ----- Inputs ----- +% +% varNames: A list of variables in a state vector. A string vector or +% cellstring vector. +% +% ----- Outputs ----- +% +% nanMembers: A logical row vector with one element per ensemble member. True +% elements indicate that the ensemble member contains NaN elements. +% +% nanVars: A logical matrix with one column per ensemble member. Each row +% indicates whether a particular variable has NaN elements in each +% ensemble member. + +% If no inputs, return summary for all variables +if ~exist('varNames','var') + nanMembers = any(obj.has_nan,1); + return; +end + +% Otherwise, return for specified variables +allVars = obj.metadata.variableNames; +if isempty(varNames) + v = 1:numel(allVars); +else + v = dash.checkStrsInList(varNames, allVars, 'varNames', 'variable in the state vector'); +end +nanMembers = obj.has_nan(v,:); + +end \ No newline at end of file diff --git a/@ensemble/rename.m b/@ensemble/rename.m new file mode 100644 index 00000000..6a91d9a0 --- /dev/null +++ b/@ensemble/rename.m @@ -0,0 +1,18 @@ +function[obj] = rename(obj, newName) +%% Renames an ensemble object +% +% obj = obj.rename(newName) +% +% ----- Inputs ----- +% +% newName: The new name for the ensemble object. A string. +% +% ----- Outputs ----- +% +% obj: The updated ensemble object + +% Update name (error checking via ensembleMetadata) +obj.metadata = obj.metadata.rename(name); +obj.name = dash.assertStrFlag(newName, 'newName'); + +end \ No newline at end of file diff --git a/@ensemble/update.m b/@ensemble/update.m index 3543c6b9..dee9da49 100644 --- a/@ensemble/update.m +++ b/@ensemble/update.m @@ -13,6 +13,7 @@ % If no matfile object is provided, load data into a structure fields = ["hasnan","metadata","stateVector"]; +props = ["has_nan", "metadata", "stateVector"]; if ~exist('ens','var') || isempty(ens) ens = dash.loadMatfileFields(obj.file, fields, '.ens'); end @@ -20,7 +21,8 @@ % Fill in the fields for f = 1:numel(fields) name = char(fields(f)); - obj.(name) = ens.(name); + propName = char(props(f)); + obj.(propName) = ens.(name); end end \ No newline at end of file diff --git a/@ensemble/useMembers.m b/@ensemble/useMembers.m index de5d8ae9..df6569a9 100644 --- a/@ensemble/useMembers.m +++ b/@ensemble/useMembers.m @@ -19,8 +19,13 @@ % Update. Error check the ensemble members. Save obj = obj.update; -[~, nEns] = obj.metadata.sizes; -members = dash.checkIndices(members, 'members', nEns, 'the number of ensemble members'); -obj.members = members(:); +if ~exist('members','var') || isempty(members) + members = []; +else + [~, nEns] = obj.metadata.sizes; + members = dash.checkIndices(members, 'members', nEns, 'the number of ensemble members'); + members = members(:); +end +obj.members = members; end \ No newline at end of file diff --git a/@ensemble/useVariables.m b/@ensemble/useVariables.m index 1c57fdde..efafcfb3 100644 --- a/@ensemble/useVariables.m +++ b/@ensemble/useVariables.m @@ -18,8 +18,13 @@ % Check variables. Update. Save obj = obj.update; -obj.stateVector.checkVariables(varNames); -varNames = string(varNames); -obj.variables = unique(varNames(:)); +if ~exist('varNames','var') || isempty(varNames) + varNames = []; +else + obj.stateVector.checkVariables(varNames); + varNames = string(varNames); + varNames = unique(varNames(:)); +end +obj.variables = varNames; end \ No newline at end of file diff --git a/@ensembleMetadata/ensembleMetadata.m b/@ensembleMetadata/ensembleMetadata.m index 4ceaa04d..e4a58166 100644 --- a/@ensembleMetadata/ensembleMetadata.m +++ b/@ensembleMetadata/ensembleMetadata.m @@ -26,7 +26,7 @@ % Jonathan King, University of Arizona, 2019-2020 properties (SetAccess = private) - ensembleName; % Name of the ensemble + name; % Name of the ensemble vectorName; % Name of the state vector template variableNames; % Names of each metadata @@ -51,19 +51,24 @@ % Constructor methods - function obj = ensembleMetadata(sv) - %% Returns an ensembleMetadata object for a stateVector, ensemble, or .ens file. + function obj = ensembleMetadata(sv, name) + %% Returns an ensembleMetadata object for a stateVector. % % obj = ensembleMetadata(sv) % Creates an ensembleMetadata object for a state vector. % + % obj = ensembleMetadata(sv, name) + % Optionally gives an identifying name for the ensemble. + % % ----- Inputs ----- % % sv: A stateVector object % + % name: An identifying name for the ensemble. A string + % % ----- Outputs ----- % - % meta: The ensemble metadata object + % obj: The ensemble metadata object % Error check. if ~isa(sv, 'stateVector') || ~isscalar(sv) @@ -71,7 +76,11 @@ end % Get names and size - obj.ensembleName = []; + if exist('name','var') + obj.name = dash.assertStrFlag(name, 'name'); + else + obj.name = []; + end obj.vectorName = sv.name; obj.nEns = 0; @@ -151,6 +160,8 @@ % User methods methods % variableNames -- Just a direct call to the field + obj = rename(obj, newName); + [V, meta] = regrid(obj, X, varName, dimOrder, d, keepSingletons); meta = variable(obj, varName, dims, type, indices); meta = dimension(obj, dim, alwaysStruct); @@ -166,5 +177,5 @@ obj = appendMembers(obj, meta2); obj = extractMembers(obj, members); end - + end \ No newline at end of file diff --git a/@ensembleMetadata/rename.m b/@ensembleMetadata/rename.m new file mode 100644 index 00000000..127929f4 --- /dev/null +++ b/@ensembleMetadata/rename.m @@ -0,0 +1,16 @@ +function[obj] = rename(obj, newName) +%% Changes the identifying ensemble name for the ensembleMetadata object. +% +% obj = obj.rename(newName) +% +% ----- Inputs ----- +% +% newName: The new identifying name for the ensemble. A string. +% +% ----- Outputs ----- +% +% obj: The updated ensembleMetadata object. + +obj.name = dash.assertStrFlag(newName, 'newName'); + +end \ No newline at end of file diff --git a/@gridfile/add.m b/@gridfile/add.m index 9805cce9..ded46439 100644 --- a/@gridfile/add.m +++ b/@gridfile/add.m @@ -1,11 +1,14 @@ function[] = add( obj, type, file, var, dims, meta, varargin ) % Adds a data source to a .grid file. % -% obj.add( type, file, var, dims, meta ) -% Add a data source. Notes the type of data source (NetCDF vs .mat), the -% name of the file, the name of the data variable in the file, the order of -% the dimensions for the variable, and the metadata associated with each -% dimension. +% obj.add('nc', file, var, dims, meta) +% Adds a NetCDF data source. +% +% obj.add('mat', file, var, dims, meta) +% Adds a .mat data source. +% +% obj.add('opendap', url, var, dims, meta) +% Adds an OPeNDAP data source. % % obj.add( ..., 'fill', fill ) % Specifies a fill value for the data source. When data is loaded from the @@ -38,6 +41,8 @@ % path. Use the full file path to add a file off the active path. All % file names must include the file extension. % +% url: An OPeNDAP url. A string. +% % var: The name of the variable in the source file. % % dims: The order of the dimensions of the variable in the source file. A @@ -66,7 +71,8 @@ % % absolute: A scalar logical indicating whether to save data source file % names as an absolute path (true), or as a path relative to the .grid -% file (false). Default is false. +% file (false). Default is false. Files located on a different drive or +% via an OPeNDAP url will use an absolute path. % Update the gridfile object in case the file was changed. obj.update; @@ -131,13 +137,13 @@ % The source metadata must exactly match a sequence of .grid metadata [inGrid, order] = ismember(value, obj.meta.(metaDims(d)), 'rows'); if any(~inGrid) - error('The %s metadata in row %.f of data source %s does not match any %s metadata in .grid file %s.', metaDims(d), find(~inGrid,1), source.file, metaDims(d), obj.file); + error('The %s metadata in row %.f of data source %s does not match any %s metadata in .grid file %s.', metaDims(d), find(~inGrid,1), source.source, metaDims(d), obj.file); elseif nRows>1 && issorted(order, 'strictdescend') - error('The %s metadata for data source %s is in the opposite order of the %s metadata in .grid file %s.', metaDims(d), source.file, metaDims(d), obj.file ); + error('The %s metadata for data source %s is in the opposite order of the %s metadata in .grid file %s.', metaDims(d), source.source, metaDims(d), obj.file ); elseif ~issorted(order, 'strictascend') - error('The %s metadata for data source %s is in a different order than the %s metadata in .grid file %s.', metaDims(d), source.file, metaDims(d), obj.file ); + error('The %s metadata for data source %s is in a different order than the %s metadata in .grid file %s.', metaDims(d), source.source, metaDims(d), obj.file ); elseif nRows>1 && ~isequal(unique(diff(order)), 1) - error('The %s metadata for data source %s skips elements that are in the %s metadata for .grid file %s.', metaDims(d), source.file, metaDims(d), obj.file ); + error('The %s metadata for data source %s skips elements that are in the %s metadata for .grid file %s.', metaDims(d), source.source, metaDims(d), obj.file ); end % Record the limits of the source data dimensions in the .grid file @@ -151,13 +157,13 @@ higher = all(dimLimit>obj.dimLimit(:,2,:), 2); overlap = all(~(lower|higher), 1); if any(overlap) - error('The data in new source file %s overlaps data in file %s, which is already in .grid file %s.', source.file, obj.source.file(find(overlap,1),:), obj.file); + error('The data in new source %s overlaps data source %s, which is already in .grid file %s.', source.source, obj.source.source(find(overlap,1),:), obj.file); end % Convert the dataSource object into a structure of primitives and % implement the desired filepath style source = gridfile.convertSourceToPrimitives(source); -source.file = obj.sourceFilepath(source.file, absolute); +source.source = obj.sourceFilepath(source.source, absolute); % Preallocate the length of each of the primitive fields sourceFields = fields(obj.source); diff --git a/@gridfile/buildSourcesForFiles.m b/@gridfile/buildSourcesForFiles.m index 59f387b5..9aee95ca 100644 --- a/@gridfile/buildSourcesForFiles.m +++ b/@gridfile/buildSourcesForFiles.m @@ -38,7 +38,7 @@ % Provide extra error information if the data source file is missing catch ME if strcmp(ME.identifier, "DASH:missingFile") - error('Cannot find data source file "%s". It may have been moved, renamed, or deleted. If the file was moved or renamed, see "gridfile.renameSources" to update the data source file path.', filenames(s)); + error('Cannot find data source "%s". It may have been moved, renamed, or deleted. If the file was moved or renamed, see "gridfile.renameSources" to update the data source file path.', filenames(s)); end rethrow(ME); end diff --git a/@gridfile/collectFullPaths.m b/@gridfile/collectFullPaths.m index 795b635e..2079a325 100644 --- a/@gridfile/collectFullPaths.m +++ b/@gridfile/collectFullPaths.m @@ -16,10 +16,13 @@ % paths: A string vector of absolute file paths. % Get the file names -paths = obj.collectPrimitives("file", s); +paths = obj.collectPrimitives("source", s); + +% Get the .grid file folders +gridPath = fileparts(obj.file); +gridFolders = split(gridPath, filesep); % Append the .grid file path to relative paths -gridFolders = split(obj.file, filesep); for f = 1:numel(paths) file = char(paths(f)); if file(1)=='.' diff --git a/@gridfile/convertSourceToPrimitives.m b/@gridfile/convertSourceToPrimitives.m index 4f77b322..302c1c2f 100644 --- a/@gridfile/convertSourceToPrimitives.m +++ b/@gridfile/convertSourceToPrimitives.m @@ -18,7 +18,7 @@ s = struct(); % Convert strings to chars -s.file = char(source.file); +s.source = char(source.source); s.var = char(source.var); s.dataType = char(source.dataType); @@ -37,6 +37,8 @@ s.type = 'nc'; elseif isa(source, 'matSource') s.type = 'mat'; +elseif isa(source, 'opendapSource') + s.type = 'opendap'; end % Post-processing fields diff --git a/@gridfile/findFileSources.m b/@gridfile/findFileSources.m index 0b41fbfd..ae1ee240 100644 --- a/@gridfile/findFileSources.m +++ b/@gridfile/findFileSources.m @@ -13,7 +13,7 @@ % matchesFile: A logical vector indicating which sources have the file name (nSource x 1). % Get the file names for the sources -sourceFile = obj.collectPrimitives("file"); +sourceFile = obj.collectPrimitives("source"); nSource = numel(sourceFile); % Remove paths diff --git a/@gridfile/info.m b/@gridfile/info.m index 8b2a6b7e..0051b860 100644 --- a/@gridfile/info.m +++ b/@gridfile/info.m @@ -105,13 +105,13 @@ % Source output structure if nargout~=0 - inputs(2:2:end) = {sources{s}.file, sources{s}.var, sourceDims, sourceSize, ... + inputs(2:2:end) = {sources{s}.source, sources{s}.var, sourceDims, sourceSize, ... sourceMeta, sources{s}.fill, sources{s}.range, sources{s}.convert}; sourceInfo(s) = struct(inputs{:}); % Print source to console else - [~, name, ext] = fileparts(sources{s}.file); + [~, name, ext] = fileparts(sources{s}.source); fprintf('The variable %s in file %s is a data source.\n', sources{s}.var, strcat(name, ext)); if ~isnan(sources{s}.fill) fprintf('The fill value is %s.\n', num2str(sources{s}.fill)); diff --git a/@gridfile/renameSources.m b/@gridfile/renameSources.m index 3eb89bdc..d09f23e5 100644 --- a/@gridfile/renameSources.m +++ b/@gridfile/renameSources.m @@ -113,20 +113,20 @@ % Convert the new file names to primitives. Update primitive array size newname = char(newname); newMax = size(newname, 2); -f = strcmp('file', fields(obj.source)); +f = strcmp('source', fields(obj.source)); if obj.maxLength(f) < newMax obj.maxLength(f) = newMax; - obj.source.file = gridfile.padPrimitives(obj.source.file, newMax); + obj.source.source = gridfile.padPrimitives(obj.source.source, newMax); else newname = gridfile.padPrimitives(newname, obj.maxLength(f)); end % Rename the sources. Save to .grid file -[~, k] = ismember('file', fields(obj.source)); +[~, k] = ismember('source', fields(obj.source)); for f = 1:nFile s = find(fileSources(:,f)); - obj.source.file(s,:) = newname(f,:); + obj.source.source(s,:) = newname(f,:); obj.fieldLength(s,k) = newLength(f); obj.absolutePath(s) = absolutePath(f); end diff --git a/@gridfile/repeatedLoad.m b/@gridfile/repeatedLoad.m index 0f7acec7..856c71b0 100644 --- a/@gridfile/repeatedLoad.m +++ b/@gridfile/repeatedLoad.m @@ -101,7 +101,7 @@ end % Load the data from the data source. Match .grid dimension order - Xsource = source.read( sourceIndices ); + [Xsource, sources{useSource(s)}] = source.read( sourceIndices ); [~, index] = ismember(source.mergedDims, obj.dims); X(outputIndices{:}) = dash.permuteDimensions(Xsource, index, false, nDims); end diff --git a/@gridfile/sourceFilepath.m b/@gridfile/sourceFilepath.m index 8c5e7620..6e1e8344 100644 --- a/@gridfile/sourceFilepath.m +++ b/@gridfile/sourceFilepath.m @@ -16,7 +16,7 @@ % path: The file path for the .grid file. if ~absolute - path = dash.relativePath( path, obj.file ); + path = dash.relativePath( path, fileparts(obj.file) ); end path = dash.unixStylePath(path); diff --git a/@stateVector/sequence.m b/@stateVector/sequence.m index b6e6bff9..0d57ef2b 100644 --- a/@stateVector/sequence.m +++ b/@stateVector/sequence.m @@ -31,8 +31,7 @@ % one dimension listed in dims. Must be in the same dimension order as % dims. % -% metadata: Metadata for the sequence. Either a vector with one element per -% sequence index or an array with one row per sequence index. +% metadata: Metadata for the sequence. An array with one row per sequence index. % % metadataCell: A cell vector. Each element contains the metadata for one % dimension listed in dims. Must be in the stame dimension order as dims diff --git a/@stateVectorVariable/sequence.m b/@stateVectorVariable/sequence.m index 12ac48bc..bf8764ff 100644 --- a/@stateVectorVariable/sequence.m +++ b/@stateVectorVariable/sequence.m @@ -26,8 +26,7 @@ % one dimension listed in dims. Must be in the same dimension order as % dims. % -% metadata: Metadata for the sequence. Either a vector with one element per -% sequence index or an array with one row per sequence index. +% metadata: Metadata for the sequence. An array with one row per sequence index. % % metadataCell: A cell vector. Each element contains the metadata for one % dimension listed in dims. Must be in the same dimension order as dims @@ -56,14 +55,12 @@ end obj.assertAddIndices(indices{k}, d(k), name); - % Error check metadata - errorStrs = ['array', 'row']; - if isvector(metadata{k}) - errorStrs = ['vector', 'element']; - metadata{k} = metadata{k}(:); - end + % Check metadata is allowed type. Convert cellstring to string + metadata{k} = gridfile.checkMetadataField(metadata{k}, dims(k)); + + % Metadata rows if size(metadata{k},1)~=numel(indices{k}) - metadataSizeError( obj, dims(k), errorStrs, numel(indices{k}), size(metadata{k},1) ); + metadataSizeError( obj, dims(k), numel(indices{k}), size(metadata{k},1) ); end % Update @@ -81,8 +78,8 @@ 'ensemble dimension, see "stateVector.design".'], obj.dims(bad), ... obj.name, obj.dims(bad)); end -function[] = metadataSizeError(obj, dim, strs, nIndex, nRows) -error(['When metadata is a %s, it must have one %s per sequence index (%.f), ',... - 'but the metadata for dimension %s in variable %s currently has %.f %ss.'], ... - strs(1), strs(2), nIndex, dim, obj.name, nRows, strs(2)); +function[] = metadataSizeError(obj, dim, nIndex, nRows) +error(['Sequence metadata must have one row per sequence index (%.f), ',... + 'but the metadata for dimension "%s" in variable "%s" currently has %.f rows.'], ... + nIndex, dim, obj.name, nRows); end \ No newline at end of file diff --git a/@stateVectorVariable/specifyMetadata.m b/@stateVectorVariable/specifyMetadata.m index 8b53b770..46917296 100644 --- a/@stateVectorVariable/specifyMetadata.m +++ b/@stateVectorVariable/specifyMetadata.m @@ -18,7 +18,7 @@ % obj: The updated stateVectorVariable object % Error check, dimension index. Cannot conflict with metadata conversion -d = obj.checkDimensions(dim, false); +[d, dim] = obj.checkDimensions(dim, false); if any(obj.convert(d)) previousMetadataError(obj, d); end diff --git a/dataSource.m b/dataSource.m index d7389b6f..0957dd82 100644 --- a/dataSource.m +++ b/dataSource.m @@ -1,12 +1,12 @@ classdef (Abstract) dataSource - %% Implements an object that can extract information from a data source - % file. dataSource is an abstract class. Concrete subclasses - % implement functionality for different types of data files. (For - % example, netCDF and .mat files). + %% Implements an object that can extract information from a data source. + % dataSource is an abstract class. Concrete subclasses + % implement functionality for different types of data sourcess. (For + % example, netCDF and .mat files and opendap files). properties - file; % The file name - var; % The name of the variable in the file + source; % The data source. A filename or opendap url + var; % (For hdf data sources) The name of the variable. dataType; % The type of data in the file. unmergedDims; % The order of the dimensions in the file unmergedSize; % The size of the original data in the file @@ -22,24 +22,17 @@ subclassResponsibilities = ["dataType", "unmergedSize"]; end - % Constructor and object methods. + % Constructor methods and error checking methods - function[obj] = dataSource(file, var, dims, fill, range, convert) + function[obj] = dataSource(source, sourceName, dims, fill, range, convert) %% Class constructor for a dataSource object. dataSource is an % abstract class, so this provides constructor operations necessary % for any data source. % - % obj = dataSource(file, var, dims, fill, range, convert) + % obj = dataSource(dims, fill, range, convert) % % ----- Inputs ----- % - % file: The name of the data source file. A string. If only the file name is - % specified, the file must be on the active path. Use the full file name - % (including path) to add a file off the active path. All file names - % must include the file extension. - % - % var: The name of the variable in the source file. - % % dims: The order of the dimensions of the variable in the source file. A % string or cellstring vector. % @@ -56,11 +49,9 @@ % multiplicative constant (a). The second element specifieds the % additive constant (b). - % Error check strings, vectors - file = dash.assertStrFlag(file, "file"); - var = dash.assertStrFlag(var, "var"); + % Error check strings, vectors. + source = dash.assertStrFlag(source, sourceName); dims = dash.assertStrList(dims, "dims"); - file = dash.checkFileExists(file); % Error check the post-processing values if ~isnumeric(fill) || ~isscalar(fill) @@ -78,46 +69,145 @@ end % Save properties - obj.file = file; - obj.var = var; + obj.source = source; obj.unmergedDims = dims; obj.fill = fill; obj.range = range; obj.convert = convert; - end - function[] = checkVariable( obj, fileVariables ) - %% Returns an error message when a data source file does not contain - % the specified data source variable. + end + function[obj] = checkFile(obj) + %% Checks the data source is a file that exists + obj.source = dash.checkFileExists(obj.source); + end + function[obj] = setVariable(obj, var) + %% For hdf data sources, sets the variable name + obj.var = dash.assertStrFlag(var, 'var'); + end + function[] = checkVariableInSource(obj, sourceVariables) + %% Checks that a variable is in a data source + if ~ismember(obj.var, sourceVariables) + error('The data source "%s" does not have a %s variable', obj.source, obj.var); + end + end + end + + % Static method used to select concrete dataSource subclasses + methods (Static) + function[source] = new(type, file, var, dims, fill, range, convert) + %% Creates a new dataSource object. dataSource is an abstract + % class, so this method routes to the constructor of the + % appropriate subclass. % - % obj.checkVariable(fileVariables); + % source = dataSource.new(type, file, var, dims, fill, range, convert) % % ----- Inputs ----- % - % fileVariables: A list a variables in the data source file. A - % string vector or cellstring vector. + % type: The type of data source. A string. + % "nc": Use when the data source is a NetCDF file. + % "mat": Use when the data source is a .mat file. + % "opendap": Use when the data source is an OPeNDAP NetCDF + % + % file: The name of the data source file. A string. If only the file name is + % specified, the file must be on the active path. Use the full file name + % (including path) to add a file off the active path. All file names + % must include the file extension. + % + % var: The name of the variable in the source file. + % + % dims: The order of the dimensions of the variable in the source file. A + % string or cellstring vector. + % + % fill: A fill value. Must be a scalar. When data is loaded from the file, + % values matching fill are converted to NaN. + % + % range: A valid range. A two element vector. The first element is the + % lower bound of the valid range. The second elements is the upper bound + % of the valid range. When data is loaded from the file, values outside + % of the range are converted to NaN. + % + % convert: Applies a linear transformation of form: Y = aX + b + % to loaded data. A two element vector. The first element specifies the + % multiplicative constant (a). The second element specifieds the + % additive constant (b). + + % Error check type + type = dash.assertStrFlag(type, 'type'); + + % Set defaults for optional values + if ~exist('fill','var') || isempty(fill) + fill = NaN; + end + if ~exist('range','var') || isempty(range) + range = [-Inf Inf]; + end + if ~exist('convert','var') || isempty(convert) + convert = [1 0]; + end + + % Create the concrete dataSource object. This will error check + % and get the size of the raw unmerged data in the source. + if strcmpi(type,'nc') + source = ncSource(file, 'file', var, dims, fill, range, convert); + elseif strcmpi(type, 'mat') + source = matSource(file, var, dims, fill, range, convert); + elseif strcmpi(type, 'opendap') + source = opendapSource(file, var, dims, fill, range, convert); + else + error('type must be one of the strings "nc", "mat", or "opendap".'); + end + + % Check that the subclass constructor set all fields for which + % it is responsible + fields = dataSource.subclassResponsibilities; + for f = 1:numel(fields) + if isempty( source.(fields(f)) ) + error('The dataSource subclass constructor did not set the "%s" property.', fields(f)); + end + end + + % Ensure all non-trailing singleton dimensions are named. Pad + % the unmerged size for any named trailing singletons. + nDims = numel(source.unmergedDims); + minimumDims = max( [1, find(source.unmergedSize~=1,1,'last')] ); + if nDims < minimumDims + error('The first %.f dimensions of variable %s in file %s require names, but dims only contains %.f elements',minimumDims, obj.var, obj.file, numel(obj.dims) ); + elseif numel(source.unmergedSize) < nDims + source.unmergedSize( end+1:nDims ) = 1; + end - infile = ismember(obj.var, fileVariables); - if ~infile - error('File %s does not contain a %s variable.', obj.file, obj.var); - end - end - function[X] = read( obj, mergedIndices ) - %% Reads values from a data source. - % - % X = obj.read( mergedIndices ) - % - % ----- Inputs ----- - % - % mergedIndices: A cell array. Each element contains the indices to read - % for one dimension. Dimensions must be in the same order as the merged - % dimensions. Indices should be linear indices along the dimension. - % - % ----- Outputs ----- - % - % X: The values read from the data source file. Dimensions are in - % the order of the merged dimensions. - + % Get the merge map and merged data size + source.mergedDims = unique(source.unmergedDims, 'stable'); + nUniqueDims = numel(source.mergedDims); + source.merge = NaN(1,nDims); + source.mergedSize = NaN(1,nUniqueDims); + + for d = 1:nUniqueDims + isdim = find( strcmp(source.mergedDims(d), source.unmergedDims) ); + source.merge(isdim) = d; + source.mergedSize(d) = prod( source.unmergedSize(isdim) ); + end + end + end + + % Interface used to read data from a dataSource + methods + function[X, obj] = read( obj, mergedIndices ) + %% Reads values from a data source. + % + % X = obj.read( mergedIndices ) + % + % ----- Inputs ----- + % + % mergedIndices: A cell array. Each element contains the indices to read + % for one dimension. Dimensions must be in the same order as the merged + % dimensions. Indices should be linear indices along the dimension. + % + % ----- Outputs ----- + % + % X: The values read from the data source file. Dimensions are in + % the order of the merged dimensions. + % Preallocate nMerged = numel(obj.mergedDims); nUnmerged = numel(obj.unmergedDims); @@ -138,8 +228,9 @@ [unmergedIndices{isdim}] = ind2sub(siz, mergedIndices{d}); end - % Currently, all data source (.mat and netCDF) can only load equally spaced + % Currently, all data source (.mat and netCDF based) can only load equally spaced % values. Get equally spaced indices to load from each source. + % (This may eventually be merged into hdfSource). for d = 1:nUnmerged uniqueIndices = unique(sort(unmergedIndices{d})); loadIndices{d} = dash.equallySpacedIndices(uniqueIndices); @@ -156,11 +247,11 @@ end % Load the values from the data source - X = obj.load( loadIndices ); + [X, obj] = obj.load( loadIndices ); % Track which dimensions become singletons via merging remove = NaN(1, nUnmerged-nMerged); - + % Permute dimensions being merged to the front for d = 1:nMerged order = 1:nUnmerged; @@ -174,14 +265,14 @@ siz = size(X); nDim = numel(isdim); siz(end+1:nDim) = 1; - + newSize = [prod(siz(1:nDim)), ones(1,nDim-1), siz(nDim+1:end)]; X = reshape(X, newSize); % Unpermute and note if any dimensions should be removed [~, reorder] = sort(order); X = permute( X, reorder ); - + k = find(isnan(remove), 1, 'first'); remove(k:k+nDim-2) = isdim(2:end); @@ -199,128 +290,32 @@ dimOrder = 1:nUnmerged; order = [dimOrder(~ismember(dimOrder,remove)), remove]; X = permute(X, order); - + % Remove any unrequested data elements that were loaded to % fulfill equal spacing requirements X = X(keepElements{:}); - + % Convert fill value to NaN if ~isnan(obj.fill) X(X==obj.fill) = NaN; end - + % Convert values outside the valid range to NaN if ~isequal(obj.range, [-Inf Inf]) valid = (X>=obj.range(1)) & (X<=obj.range(2)); X(~valid) = NaN; end - + % Apply linear transformation if ~isequal(obj.convert, [1 0]) X = obj.convert(1)*X + obj.convert(2); end end end - - % Create new dataSource subclass - methods (Static) - function[source] = new(type, file, var, dims, fill, range, convert) - %% Creates a new dataSource object. dataSource is an abstract - % class, so this method routes to the constructor of the - % appropriate subclass. - % - % source = dataSource.new(type, file, var, dims, fill, range, convert) - % - % ----- Inputs ----- - % - % type: The type of data source. A string. - % "nc": Use when the data source is a NetCDF file. - % "mat": Use when the data source is a .mat file. - % - % file: The name of the data source file. A string. If only the file name is - % specified, the file must be on the active path. Use the full file name - % (including path) to add a file off the active path. All file names - % must include the file extension. - % - % var: The name of the variable in the source file. - % - % dims: The order of the dimensions of the variable in the source file. A - % string or cellstring vector. - % - % fill: A fill value. Must be a scalar. When data is loaded from the file, - % values matching fill are converted to NaN. - % - % range: A valid range. A two element vector. The first element is the - % lower bound of the valid range. The second elements is the upper bound - % of the valid range. When data is loaded from the file, values outside - % of the range are converted to NaN. - % - % convert: Applies a linear transformation of form: Y = aX + b - % to loaded data. A two element vector. The first element specifies the - % multiplicative constant (a). The second element specifieds the - % additive constant (b). - - % Check the type is allowed - if ~dash.isstrflag(type) || ~ismember(type, ["nc","mat"]) - error('type must be either the string "nc" or "mat".'); - end - - % Set defaults for optional values - if ~exist('fill','var') || isempty(fill) - fill = NaN; - end - if ~exist('range','var') || isempty(range) - range = [-Inf Inf]; - end - if ~exist('convert','var') || isempty(convert) - convert = [1 0]; - end - - % Create the subclass dataSource object. This will error check - % file, var, and dims and get the size of the raw unmerged data - % in the source. - if strcmp(type,'nc') - source = ncSource(file, var, dims, fill, range, convert); - elseif strcmp(type, 'mat') - source = matSource(file, var, dims, fill, range, convert); - end - - % Check that the subclass constructor set all fields for which - % it is responsible - fields = dataSource.subclassResponsibilities; - for f = 1:numel(fields) - if isempty( source.(fields(f)) ) - error('The dataSource subclass constructor did not set the "%s" property.', fields(f)); - end - end - - % Ensure all non-trailing singleton dimensions are named. Pad - % the unmerged size for any named trailing singletons. - nDims = numel(source.unmergedDims); - minimumDims = max( [1, find(source.unmergedSize~=1,1,'last')] ); - if nDims < minimumDims - error('The first %.f dimensions of variable %s in file %s require names, but dims only contains %.f elements',minimumDims, obj.var, obj.file, numel(obj.dims) ); - elseif numel(source.unmergedSize) < nDims - source.unmergedSize( end+1:nDims ) = 1; - end - - % Get the merge map and merged data size - source.mergedDims = unique(source.unmergedDims, 'stable'); - nUniqueDims = numel(source.mergedDims); - source.merge = NaN(1,nDims); - source.mergedSize = NaN(1,nUniqueDims); - - for d = 1:nUniqueDims - isdim = find( strcmp(source.mergedDims(d), source.unmergedDims) ); - source.merge(isdim) = d; - source.mergedSize(d) = prod( source.unmergedSize(isdim) ); - end - end - end - - % Subclasses must load values from data source files + + % Concrete subclasses must be able to load data from requested indices methods (Abstract) - X = load(obj, indices); + [X, obj] = load(obj, indices); end end \ No newline at end of file diff --git a/matSource.m b/matSource.m index 5c7481db..af829ee4 100644 --- a/matSource.m +++ b/matSource.m @@ -1,5 +1,5 @@ classdef matSource < dataSource - %% Implements a data source object that can read values from .mat files. + %% Reads data from a .mat file data source properties m; % A matfile object @@ -11,35 +11,41 @@ methods function obj = matSource(file, var, dims, fill, range, convert) - %% Creates a new matSource object. + %% Creates a new matSource object. Checks the matfile is valid + % and contains the required variable % % obj = matSource(file, var, dims, fill, range, convert) % % ----- Inputs ----- % - % See the documentation in dataSource.new + % file: The name of the .mat file. A string. + % + % var: The name of the variable in the .mat file. A string. + % + % dims, fill, range, convert: See the documentation in dataSource % % ----- Outputs ----- % - % obj: A new matSource object. + % obj: The new matSource object - % First call the data source constructor for initial error - % checking and to save the input args - obj@dataSource(file, var, dims, fill, range, convert); + % Constructor and error checks + obj@dataSource(file, 'file', dims, fill, range, convert); + obj = obj.checkFile; + obj = obj.setVariable(var); - % Matfile access is via chars + % Use chars to access matfile variables obj.var = char(obj.var); - % Check that the file is a matfile + % Check the file is a matfile try - obj.m = matfile(file); + obj.m = matfile(obj.source); catch - error('The file %s is not a valid .mat file.', file); + error('The file %s is not a valid .mat file.', obj.source); end % Check the variable is in the file fileVariables = string(who(obj.m)); - obj.checkVariable( fileVariables ); + obj.checkVariableInSource(fileVariables); % Get the data type and size of the array info = whos(obj.m, obj.var); @@ -48,17 +54,20 @@ % Warn the user if this is not v7.3 warn = warning('query', obj.warnID); - warning('error', obj.warnID); %#ok + warning('error', obj.warnID); firstIndex = repmat({1}, [1, numel(obj.unmergedSize)]); try obj.m.(obj.var)(firstIndex{:}); catch - warning('File %s is not a version 7.3 .mat file. Version 7.3 is STRONGLY recommended for use with dash. Consider saving .mat files with the ''-v7.3'' flag or use dash.convertToV7_3 to convert existing .mat files to a v7.3 format. For more details, see the Matlab documention on "save" and "MAT-File versions".', obj.file); + warning(['File %s is not a version 7.3 .mat file. Version 7.3 ',... + 'is STRONGLY recommended for use with DASH. Consider saving .mat files ',... + 'with the ''-v7.3'' flag or use dash.convertToV7_3 to convert existing .mat ',... + 'files to a v7.3 format. For more details, see the Matlab documention on "save" ',... + 'and "MAT-File versions".'], obj.source); end - warning( warn.state, obj.warnID ); - - end - function[X] = load( obj, indices ) + warning( warn.state, obj.warnID ); + end + function[X, obj] = load( obj, indices ) %% Loads data from a .mat data source. % % X = obj.load(indices) @@ -74,9 +83,11 @@ % % X: The data located at the requested indices. - % Disable the partial load warning and load + % Disable the partial load warning warn = warning('query', obj.warnID); warning('off', obj.warnID); + + % Load the data X = obj.m.(obj.var)(indices{:}); % Restore the original warning state diff --git a/ncSource.m b/ncSource.m index 43ee5de1..1885aebd 100644 --- a/ncSource.m +++ b/ncSource.m @@ -1,48 +1,41 @@ classdef ncSource < dataSource - %% Implements a data source object that can read values from a netCDF file. + %% Used to read data from source based on a NetCDF format. Includes + % local NetCDF files and OPeNDAP requests. properties - nDims; % The number of defined dimensions for the variable in the netCDF + nDims; % The number of dimensions recorded in the NetCDF end methods - function obj = ncSource(file, var, dims, fill, range, convert) - %% Creates a new ncSource object. - % - % obj = ncSource(file, var, dims, fill, range, convert) - % - % ----- Inputs ----- - % - % See the documentation in dataSource.new - % - % ----- Outputs ----- - % - % obj: A new ncSource object + function obj = ncSource(source, sourceName, var, dims, fill, range, convert) - % First call the data source constructor for initial error - % checking and to save the input args - obj@dataSource(file, var, dims, fill, range, convert); + % Constructor and error checking + obj@dataSource(source, sourceName, dims, fill, range, convert); + obj = obj.setVariable(var); + if strcmp(sourceName, 'file') + obj = obj.checkFile; + end - % Check the file is actually a NetCDF + % Check the source is actually a NetCDF try - info = ncinfo( obj.file ); + info = ncinfo(obj.source); catch - error('The file %s is not a valid NetCDF file.', obj.file ); + error('The data source "%s" is not a valid NetCDF file.', obj.source); end - % Check that the variable is in the file + % Check the variable is in the file. Get the list of variables. nVars = numel(info.Variables); fileVariables = cell(nVars,1); [fileVariables{:}] = deal( info.Variables.Name ); - obj.checkVariable( fileVariables ); + obj.checkVariableInSource(fileVariables); % Get the data type and size of the array [~,v] = ismember(obj.var, fileVariables); obj.dataType = info.Variables(v).Datatype; obj.unmergedSize = info.Variables(v).Size; obj.nDims = numel(info.Variables(v).Dimensions); - end - function[X] = load(obj, indices) + end + function[X, obj] = load(obj, indices) %% Loads data from a netCDF data source. % % X = obj.load(indices) @@ -73,8 +66,8 @@ end % Load the data - X = ncread( obj.file, obj.var, start, count, stride ); + X = ncread( obj.source, obj.var, start, count, stride ); end end -end \ No newline at end of file +end diff --git a/opendapSource.m b/opendapSource.m new file mode 100644 index 00000000..e3848830 --- /dev/null +++ b/opendapSource.m @@ -0,0 +1,72 @@ +classdef opendapSource < ncSource + %% Reads data accessed via an OPeNDAP url. Attempts to load and save + % the entire variable when using repeated loads to increase speed. + + properties + X; % The loaded and saved dataset + attemptFullLoad; % Whether to attempt to load the entire dataset + saved; % Whether the dataset is currently saved + end + + methods + function obj = opendapSource(url, var, dims, fill, range, convert) + %% Creates a new opendapSource. Checks the url links to a valid + % netcdf file that contains the specified variable. + % + % obj = opendapSource(url, var, dims, fill, range, convert) + % + % ----- Inputs ----- + % + % url: The OPeNDAP url. A string. + % + % var: The name of the variable in the OPeNDAP NetCDF. A string + % + % dims, fill, range, convert: See the documentation in dataSource + % + % ----- Outputs ----- + % + % obj: The new opendapSource object + + % Superclass constructors + obj@ncSource(url, false, var, dims, fill, range, convert); + + % Track status of loading the entire dataset + obj.attemptFullLoad = true; + obj.saved = false; + end + function[X, obj] = load(obj, indices) + %% Load data from an OPeNDAP data source. + % + % X = obj.load(indices) + % + % ----- Inputs ----- + % + % indices: A cell array. Each element contains the linear + % indices to load for a dimension. Indices must be equally + % spaced and monotonically increasing. Dimensions must be in + % the order of the unmerged dimensions. + % + % ----- Outputs ----- + % + % X: The data located at the requested indices. + + % Attempt to load the entire dataset once + if obj.attemptFullLoad + try + obj.X = ncread(obj.source, obj.var); + obj.saved = true; + catch + end + obj.attemptFullLoad = false; + end + + % If saved, load directly. Otherwise, use ncread + if obj.saved + X = obj.X(indices{:}); + else + [X, obj] = load@ncSource(obj, indices); + end + end + end + +end \ No newline at end of file