Skip to content

Commit

Permalink
feat: mp_csv2ds macro for importing a CSV using a SAS table to provid…
Browse files Browse the repository at this point in the history
…e a template (eg for lengths / types etc)
  • Loading branch information
allanbowe committed Nov 2, 2020
1 parent 15687be commit cc7cc55
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
107 changes: 107 additions & 0 deletions all.sas
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,113 @@ Usage:
%mend;
/**
@file mp_csv2ds.sas
@brief Efficient import of arbitrary CSV using a dataset as template
@details Used to import relevant columns from a large CSV using
a dataset to provide the types and lengths. Assumes that a header
row is provided, and datarows start on line 2. Extra columns in
both the CSV and base dataset are ignored.
Usage:
filename mycsv temp;
data _null_;
file mycsv;
put 'name,age,nickname';
put 'John,48,Jonny';
put 'Jennifer,23,Jen';
run;
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
@param inref= fileref to the CSV
@param outds= output ds. Could also be a view (eg `outds=myds/view=myds`)
@param baseds= Template dataset on which to create the input statement.
Is used to determine types, lengths, and any informats.
@version 9.2
@author Allan Bowe
**/
%macro mp_csv2ds(inref=0,outds=0,baseds=0);
%if &inref=0 %then %do;
%put %str(ERR)OR: the INREF variable must be provided;
%let syscc=4;
%abort;
%end;
%if &outds=0 %then %do;
%put %str(ERR)OR: the OUTDS variable must be provided;
%let syscc=4;
%return;
%end;
%if &baseds=0 %then %do;
%put %str(ERR)OR: the BASEDS variable must be provided;
%let syscc=4;
%return;
%end;
/* get the variables in the CSV */
data _data_;
infile &inref;
input;
length name $32;
do i=1 to countc(_infile_,',')+1;
name=upcase(scan(_infile_,i,','));
output;
end;
stop;
run;
%local csv_vars;%let csv_vars=&syslast;
/* get the variables in the dataset */
proc contents noprint data=&baseds
out=_data_ (keep=name type length format: informat);
run;
%local base_vars; %let base_vars=&syslast;
proc sql undo_policy=none;
create table &csv_vars as
select a.*
,b.type
,b.length
,b.format
,b.formatd
,b.formatl
,b.informat
from &csv_vars a
left join &base_vars b
on a.name=upcase(b.name)
order by i;
/* prepare the input statement */
%local instat dropvars;
data _null_;
set &syslast end=last;
length in dropvars $32767;
retain in dropvars;
if missing(type) then do;
informat='$1.';
dropvars=catx(' ',dropvars,name);
end;
else if missing(informat) then do;
if type=1 then informat='best.';
else informat=cats('$',length,'.');
end;
in=catx(' ',in,name,':',informat);
if last then do;
call symputx('instat',in,'l');
call symputx('dropvars',dropvars,'l');
end;
run;
data &outds;
infile &inref dsd firstobs=2;
input &instat;
drop &dropvars;
run;
%mend;/**
@file mp_deleteconstraints.sas
@brief Delete constraionts
@details Takes the output from mp_getconstraints.sas as input
Expand Down
109 changes: 109 additions & 0 deletions base/mp_csv2ds.sas
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/**
@file mp_csv2ds.sas
@brief Efficient import of arbitrary CSV using a dataset as template
@details Used to import relevant columns from a large CSV using
a dataset to provide the types and lengths. Assumes that a header
row is provided, and datarows start on line 2. Extra columns in
both the CSV and base dataset are ignored.
Usage:
filename mycsv temp;
data _null_;
file mycsv;
put 'name,age,nickname';
put 'John,48,Jonny';
put 'Jennifer,23,Jen';
run;
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
@param inref= fileref to the CSV
@param outds= output ds. Could also be a view (eg `outds=myds/view=myds`)
@param baseds= Template dataset on which to create the input statement.
Is used to determine types, lengths, and any informats.
@version 9.2
@author Allan Bowe
**/

%macro mp_csv2ds(inref=0,outds=0,baseds=0);
%if &inref=0 %then %do;
%put %str(ERR)OR: the INREF variable must be provided;
%let syscc=4;
%abort;
%end;
%if &outds=0 %then %do;
%put %str(ERR)OR: the OUTDS variable must be provided;
%let syscc=4;
%return;
%end;
%if &baseds=0 %then %do;
%put %str(ERR)OR: the BASEDS variable must be provided;
%let syscc=4;
%return;
%end;

/* get the variables in the CSV */
data _data_;
infile &inref;
input;
length name $32;
do i=1 to countc(_infile_,',')+1;
name=upcase(scan(_infile_,i,','));
output;
end;
stop;
run;
%local csv_vars;%let csv_vars=&syslast;

/* get the variables in the dataset */
proc contents noprint data=&baseds
out=_data_ (keep=name type length format: informat);
run;
%local base_vars; %let base_vars=&syslast;

proc sql undo_policy=none;
create table &csv_vars as
select a.*
,b.type
,b.length
,b.format
,b.formatd
,b.formatl
,b.informat
from &csv_vars a
left join &base_vars b
on a.name=upcase(b.name)
order by i;

/* prepare the input statement */
%local instat dropvars;
data _null_;
set &syslast end=last;
length in dropvars $32767;
retain in dropvars;
if missing(type) then do;
informat='$1.';
dropvars=catx(' ',dropvars,name);
end;
else if missing(informat) then do;
if type=1 then informat='best.';
else informat=cats('$',length,'.');
end;
in=catx(' ',in,name,':',informat);
if last then do;
call symputx('instat',in,'l');
call symputx('dropvars',dropvars,'l');
end;
run;

/* import the CSV */
data &outds;
infile &inref dsd firstobs=2;
input &instat;
drop &dropvars;
run;

%mend;

0 comments on commit cc7cc55

Please sign in to comment.