diff --git a/all.sas b/all.sas index 2fa2d7fb..42e8b8bf 100644 --- a/all.sas +++ b/all.sas @@ -1904,6 +1904,113 @@ Usage: %mend; /** + @file mp_csv2ds.sas + @brief Efficient import of arbitrary CSV using a dataset as template + @details Used to import relevant columns from a large CSV using + a dataset to provide the types and lengths. Assumes that a header + row is provided, and datarows start on line 2. Extra columns in + both the CSV and base dataset are ignored. + + Usage: + + filename mycsv temp; + data _null_; + file mycsv; + put 'name,age,nickname'; + put 'John,48,Jonny'; + put 'Jennifer,23,Jen'; + run; + + %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class) + + + @param inref= fileref to the CSV + @param outds= output ds. Could also be a view (eg `outds=myds/view=myds`) + @param baseds= Template dataset on which to create the input statement. + Is used to determine types, lengths, and any informats. + + @version 9.2 + @author Allan Bowe +**/ + +%macro mp_csv2ds(inref=0,outds=0,baseds=0); +%if &inref=0 %then %do; + %put %str(ERR)OR: the INREF variable must be provided; + %let syscc=4; + %abort; +%end; +%if &outds=0 %then %do; + %put %str(ERR)OR: the OUTDS variable must be provided; + %let syscc=4; + %return; +%end; +%if &baseds=0 %then %do; + %put %str(ERR)OR: the BASEDS variable must be provided; + %let syscc=4; + %return; +%end; + +/* get the variables in the CSV */ +data _data_; + infile &inref; + input; + length name $32; + do i=1 to countc(_infile_,',')+1; + name=upcase(scan(_infile_,i,',')); + output; + end; + stop; +run; +%local csv_vars;%let csv_vars=&syslast; + +/* get the variables in the dataset */ +proc contents noprint data=&baseds + out=_data_ (keep=name type length format: informat); +run; +%local base_vars; %let base_vars=&syslast; + +proc sql undo_policy=none; +create table &csv_vars as + select a.* + ,b.type + ,b.length + ,b.format + ,b.formatd + ,b.formatl + ,b.informat + from &csv_vars a + left join &base_vars b + on a.name=upcase(b.name) + order by i; + +/* prepare the input statement */ +%local instat dropvars; +data _null_; + set &syslast end=last; + length in dropvars $32767; + retain in dropvars; + if missing(type) then do; + informat='$1.'; + dropvars=catx(' ',dropvars,name); + end; + else if missing(informat) then do; + if type=1 then informat='best.'; + else informat=cats('$',length,'.'); + end; + in=catx(' ',in,name,':',informat); + if last then do; + call symputx('instat',in,'l'); + call symputx('dropvars',dropvars,'l'); + end; +run; + +data &outds; + infile &inref dsd firstobs=2; + input &instat; + drop &dropvars; +run; + +%mend;/** @file mp_deleteconstraints.sas @brief Delete constraionts @details Takes the output from mp_getconstraints.sas as input diff --git a/base/mp_csv2ds.sas b/base/mp_csv2ds.sas new file mode 100644 index 00000000..bbc4fdc5 --- /dev/null +++ b/base/mp_csv2ds.sas @@ -0,0 +1,109 @@ +/** + @file mp_csv2ds.sas + @brief Efficient import of arbitrary CSV using a dataset as template + @details Used to import relevant columns from a large CSV using + a dataset to provide the types and lengths. Assumes that a header + row is provided, and datarows start on line 2. Extra columns in + both the CSV and base dataset are ignored. + + Usage: + + filename mycsv temp; + data _null_; + file mycsv; + put 'name,age,nickname'; + put 'John,48,Jonny'; + put 'Jennifer,23,Jen'; + run; + + %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class) + + + @param inref= fileref to the CSV + @param outds= output ds. Could also be a view (eg `outds=myds/view=myds`) + @param baseds= Template dataset on which to create the input statement. + Is used to determine types, lengths, and any informats. + + @version 9.2 + @author Allan Bowe +**/ + +%macro mp_csv2ds(inref=0,outds=0,baseds=0); +%if &inref=0 %then %do; + %put %str(ERR)OR: the INREF variable must be provided; + %let syscc=4; + %abort; +%end; +%if &outds=0 %then %do; + %put %str(ERR)OR: the OUTDS variable must be provided; + %let syscc=4; + %return; +%end; +%if &baseds=0 %then %do; + %put %str(ERR)OR: the BASEDS variable must be provided; + %let syscc=4; + %return; +%end; + +/* get the variables in the CSV */ +data _data_; + infile &inref; + input; + length name $32; + do i=1 to countc(_infile_,',')+1; + name=upcase(scan(_infile_,i,',')); + output; + end; + stop; +run; +%local csv_vars;%let csv_vars=&syslast; + +/* get the variables in the dataset */ +proc contents noprint data=&baseds + out=_data_ (keep=name type length format: informat); +run; +%local base_vars; %let base_vars=&syslast; + +proc sql undo_policy=none; +create table &csv_vars as + select a.* + ,b.type + ,b.length + ,b.format + ,b.formatd + ,b.formatl + ,b.informat + from &csv_vars a + left join &base_vars b + on a.name=upcase(b.name) + order by i; + +/* prepare the input statement */ +%local instat dropvars; +data _null_; + set &syslast end=last; + length in dropvars $32767; + retain in dropvars; + if missing(type) then do; + informat='$1.'; + dropvars=catx(' ',dropvars,name); + end; + else if missing(informat) then do; + if type=1 then informat='best.'; + else informat=cats('$',length,'.'); + end; + in=catx(' ',in,name,':',informat); + if last then do; + call symputx('instat',in,'l'); + call symputx('dropvars',dropvars,'l'); + end; +run; + +/* import the CSV */ +data &outds; + infile &inref dsd firstobs=2; + input &instat; + drop &dropvars; +run; + +%mend; \ No newline at end of file