-
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: mp_csv2ds macro for importing a CSV using a SAS table to provid…
…e a template (eg for lengths / types etc)
- Loading branch information
Showing
2 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/** | ||
@file mp_csv2ds.sas | ||
@brief Efficient import of arbitrary CSV using a dataset as template | ||
@details Used to import relevant columns from a large CSV using | ||
a dataset to provide the types and lengths. Assumes that a header | ||
row is provided, and datarows start on line 2. Extra columns in | ||
both the CSV and base dataset are ignored. | ||
Usage: | ||
filename mycsv temp; | ||
data _null_; | ||
file mycsv; | ||
put 'name,age,nickname'; | ||
put 'John,48,Jonny'; | ||
put 'Jennifer,23,Jen'; | ||
run; | ||
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class) | ||
@param inref= fileref to the CSV | ||
@param outds= output ds. Could also be a view (eg `outds=myds/view=myds`) | ||
@param baseds= Template dataset on which to create the input statement. | ||
Is used to determine types, lengths, and any informats. | ||
@version 9.2 | ||
@author Allan Bowe | ||
**/ | ||
|
||
%macro mp_csv2ds(inref=0,outds=0,baseds=0); | ||
%if &inref=0 %then %do; | ||
%put %str(ERR)OR: the INREF variable must be provided; | ||
%let syscc=4; | ||
%abort; | ||
%end; | ||
%if &outds=0 %then %do; | ||
%put %str(ERR)OR: the OUTDS variable must be provided; | ||
%let syscc=4; | ||
%return; | ||
%end; | ||
%if &baseds=0 %then %do; | ||
%put %str(ERR)OR: the BASEDS variable must be provided; | ||
%let syscc=4; | ||
%return; | ||
%end; | ||
|
||
/* get the variables in the CSV */ | ||
data _data_; | ||
infile &inref; | ||
input; | ||
length name $32; | ||
do i=1 to countc(_infile_,',')+1; | ||
name=upcase(scan(_infile_,i,',')); | ||
output; | ||
end; | ||
stop; | ||
run; | ||
%local csv_vars;%let csv_vars=&syslast; | ||
|
||
/* get the variables in the dataset */ | ||
proc contents noprint data=&baseds | ||
out=_data_ (keep=name type length format: informat); | ||
run; | ||
%local base_vars; %let base_vars=&syslast; | ||
|
||
proc sql undo_policy=none; | ||
create table &csv_vars as | ||
select a.* | ||
,b.type | ||
,b.length | ||
,b.format | ||
,b.formatd | ||
,b.formatl | ||
,b.informat | ||
from &csv_vars a | ||
left join &base_vars b | ||
on a.name=upcase(b.name) | ||
order by i; | ||
|
||
/* prepare the input statement */ | ||
%local instat dropvars; | ||
data _null_; | ||
set &syslast end=last; | ||
length in dropvars $32767; | ||
retain in dropvars; | ||
if missing(type) then do; | ||
informat='$1.'; | ||
dropvars=catx(' ',dropvars,name); | ||
end; | ||
else if missing(informat) then do; | ||
if type=1 then informat='best.'; | ||
else informat=cats('$',length,'.'); | ||
end; | ||
in=catx(' ',in,name,':',informat); | ||
if last then do; | ||
call symputx('instat',in,'l'); | ||
call symputx('dropvars',dropvars,'l'); | ||
end; | ||
run; | ||
|
||
/* import the CSV */ | ||
data &outds; | ||
infile &inref dsd firstobs=2; | ||
input &instat; | ||
drop &dropvars; | ||
run; | ||
|
||
%mend; |