-
Notifications
You must be signed in to change notification settings - Fork 0
/
Conditional Processing Key.sas
384 lines (323 loc) · 11.5 KB
/
Conditional Processing Key.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
* ============================================================================;
* Lab: Conditional Processing
* This code is posted for your benefit. However, I highly recommend that you
* practice typing your own SAS programs as well. With the SAS programming
* language, as with all new languages, immersion seems to be the best way to
* learn.
* ============================================================================;
* Task 1.
* Import the file called Class Survey 5141.csv as a new SAS data set called
* classurv1 in a new SAS library called classurv.;
* ============================================================================;
libname classurv "C:\Users\mbc0022\Desktop";
proc import out = classurv.classurv1
datafile = "C:\Users\mbc0022\Desktop\Class Survey 5141.csv"
dbms = csv replace;
datarow = 3;
run;
* Task 2.
* Using the KEEP statement in a DATA step, subset the data to contain
* only the variables for the following questions (HINT: Some of the variable
* names in the survey data set may be slightly different than the variable names
* listed in the codebook): Insurance, personal doctor, veteran, marital status,
* employment, salary, year born, month born, gender.;
* ============================================================================;
data classurv.classurv2;
set classurv.classurv1;
keep q13 q14 q17 q18 q20 q21_1 q24 q25 q26;
run;
* Task 3.
* Create user-defined formats for the values of all categorical variables in the
* data set (i.e. NOT salary or year of birth).;
* ============================================================================;
proc format;
value standard01_ 0 = 'No'
1 = 'Yes';
value reason 1 = 'Required for graduation'
2 = 'Personal interest'
3 = 'Improve employment opportunities'
4 = 'To analyze data for my current research';
value reason2_ 1 = 'Personal interest'
2 = 'Improve employment opportunities'
3 = 'To analyze data for my current research';
value packages 1 = 'SAS'
2 = 'SPSS'
3 = 'Stata'
4 = 'R'
5 = 'Other';
value genhealth 1 = 'Excellent'
2 = 'Very Good'
3 = 'Good'
4 = 'Fair'
5 = 'Poor';
value seatbelt 1 = 'Always'
2 = 'Nearly Always'
3 = 'Sometimes'
4 = 'Seldom'
5 = 'Never';
value persdoc 0 = 'No'
1 = 'Only one'
2 = 'More than one';
value race 1 = 'White'
2 = 'Black'
3 = 'AI/AN'
4 = 'Asian'
5 = 'PI'
6 = 'Other';
value marital 1 = 'Married'
2 = 'Divorced'
3 = 'Widowed'
4 = 'Separated'
5 = 'Never Married';
value employ 1 = 'Empoyed for wages'
2 = 'Self-employed'
3 = 'Out of work >= 1'
4 = 'Out of work < 1'
5 = 'Homemaker'
6 = 'Student'
7 = 'Retired'
8 = 'Unable to work';
value income 1 = '0-25'
2 = '25001-50'
3 = '50001-75'
4 = '75001-100'
5 = '100001-125'
6 = '125001-150'
7 = '150001-175'
8 = '175001-200'
9 = '200001+';
value month 1 = 'January'
2 = 'February'
3 = 'March'
4 = 'April'
5 = 'May'
6 = 'June'
7 = 'July'
8 = 'August'
9 = 'September'
10 = 'October'
11 = 'November'
12 = 'December';
value gender 1 = 'Female'
2 = 'Male';
value color 1 = 'Black'
2 = 'Blue'
3 = 'Gray'
4 = 'Green'
5 = 'Orange'
6 = 'Pink'
7 = 'Purple'
8 = 'Red'
9 = 'White'
10 = 'Yellow';
value bmi 1 = 'Underweight'
2 = 'Healthy weight'
3 = 'Overweight'
4 = 'Obese';
value raceth 1 = 'White, non-Hispanic'
2 = 'Black, non-Hispanic'
3 = 'Hispanic, any race'
4 = 'Other race, non-Hispanic';
run;
* Task 4.
* Create a new data set named classurv3. Use the RENAME statement to give each
* variable a short (but meaningful) name. For example, "insurance" or "pers_doc."
* Then apply labels to the variable names. Finally, in the same DATA step, apply
* the user-defined formats you created as well as an appropriate SAS format to
* the values of the annual salary variable.;
* ============================================================================;
* Rename variables and Apply Formats;
* http://support.sas.com/documentation/cdl/en/lrcon/62955/HTML/default/
viewer.htm#a000695119.htm;
* ============================================================================;
data classurv.classurv3;
set classurv.classurv2;
rename q13 = ins /* RENAME statement does not change appearance of */
q14 = persdoc /* Viewtable Window - Labels */
q17 = veteran
q18 = marital
q20 = employ
q21_1 = salary
q24 = year
q25 = month
q26 = gender;
label q13 = "Any Insurance"
q14 = "Personal Doctor"
q17 = "Veteran Status"
q18 = "Marital Status"
q20 = "Employment Status"
q21_1 = "Annual Salary"
q24 = "Year of Birth"
q25 = "Month of Birth"
q26 = "Gender";
format q13 standard01_.
q14 persdoc.
q17 standard01_.
q18 marital.
q20 employ.
q21_1 dollar10.2
q25 month.
q26 gender.;
run;
proc print data = classurv.classurv3;
title1 "Checking the Edits to the Class Survey Data";
footnote "&sysdate at &systime";
run;
* Task 5.
* Create a new data set named classsurv4. Recode the values in the year of birth
* variable to the actual year of birth, as opposed to the coded value for the
* year of birth. (HINT: This should only require one line of code).;
* ============================================================================;
data classurv.classurv4;
set classurv.classurv3;
year = year + 1949;
run;
*Can also use a SUM function;
data classurv.classurv4;
set classurv.classurv3;
year = sum(year, 1949);
run;
*Can also use a SUM statement;
data classurv.classurv4;
set classurv.classurv3;
year + 1949;
run;
* Task 6.
* Create, view, and compare a list report of classurv4 with classurv3.;
* ============================================================================;
proc print data = classurv.classurv3;
title1 "Comparing classurv3 to classurv4";
footnote "&sysdate at &systime";
run;
proc print data = classurv.classurv4;
title1 "Comparing classurv3 to classurv4";
footnote "&sysdate at &systime";
run;
* Task 7.
* Create a data set named classsurv5. In the same DATA step create a new
* calculated variable that contains each student's date of birth. Name the
* variable dob and assume each student was born on the 15th day of the month.
* Additionally, create another calculated variable that contains the age of each
* student (as of their last birthday) at the time of the survey (Assume everyone
* took the survey on January 10, 2014). Name this variable age.;
* ============================================================================;
data classurv.classurv5;
set classurv.classurv4;
dob = mdy(month, 15, year);
age = int(yrdif(dob, '10JAN2014'd, 'Actual'));
run;
* How many missing observations are there for the variable age?;
proc print data = classurv.classurv5;
where age = .;
run;
* Task 8.
* Determine the average age of students on the first day of class.;
* ============================================================================;
proc means data = classurv.classurv5;
var age;
title1 "Average Age of Students on the First Day of Class";
footnote "&sysdate at &systime";
run;
* Task 9.
* Create a histogram that depicts the distribution of students’ salaries.;
* ============================================================================;
proc sgplot data = classurv.classurv5;
histogram salary;
density salary;
title "Histogram of Salary on First Day of Class";
footnote "&sysdate at &systime";
run;
* Task 10. Determine quartiles of the variable for annual salary.;
* ============================================================================;
proc univariate data = classurv.classurv5;
var salary;
ods select quantiles; /*Display only the quantiles table from PROC UNIVARIATE*/
title1 "Determine Quartiles of Annual Salary";
footnote "&sysdate at &systime";
run;
* Task 11.
* Create a new SAS data set called classurv6. Use conditional
* processing to categorize participants into groups defined by quartiles of
* salary;
* ============================================================================;
data classurv.classurv6;
set classurv.classurv5;
if missing(salary) then salary4cat = .;
else if salary <= 2500 then salary4cat = 1;
else if (2500 < salary <= 11492) then salary4cat = 2;
else if (11492 < salary <= 39130) then salary4cat = 3;
else if salary > 39130 then salary4cat = 4;
run;
* Task 12.
* Create a list report to view the continuous salary variable and the
* categorized salary variable only.;
* ============================================================================;
proc print data = classurv.classurv6;
var salary salary4cat;
title1 "Comparing Salary and Salary4cat";
footnote "&sysdate at &systime";
run;
* Task 13.
* Determine the percentage of participants in each of the salary categories.;
* ============================================================================;
proc freq data = classurv.classurv6;
tables salary4cat * gender / norow nopercent;
title1 "Percentage of Students in each Category of Salary";
title2 "By Gender";
footnote "&sysdate at &systime";
run;
* Task 13: Alternative way of categorizing salary and viewing one-way
* frequency report;
proc format;
value salary low-2500 = '1st Quartile'
>2500-11492 = '2nd Quartile'
>11492-39130 = '3rd Quartile'
39130-high = '4th Quartile';
run;
proc freq data = classurv.classurv6;
tables salary * gender;
format salary salary.;
title1 "Percentage of Students in each Category of Salary";
title2 "By Gender";
footnote "&sysdate at &systime";
run;
* Task 14.
* Using the WHERE statement, create a list report that includes only females
* who are between 22 and 24 years old (inclusive). This list report should
* include only the obs column, and the variables for gender, and age, in the
* given order.;
* ============================================================================;
proc print data = classurv.classurv6;
where gender = 1 & (age between 22 and 24);
var gender age;
title1 "List Report of Females who are Between the ages of 22 and 24";
footnote "&sysdate at &systime";
run;
* Task 15.
* Create a new SAS data set called classurv7. In the same DATA step create a
* midterm exam weight variable and a final exam weight variable. For males the
* midterm is worth 40% and the final is worth 60%. For females the midterm is
* worth 60% and the final is worth 40%.;
* ============================================================================;
data classurv.classurv7;
set classurv.classurv6;
if gender = 1 then
do;
midterm = .6;
final = .4;
end;
else if gender = 2 then
do;
midterm = .4;
final = .6;
end;
run;
* Task 16.
* Create a list report that includes the variables for gender, midterm weight,
* and final weight.;
* ============================================================================;
proc print data = classurv.classurv7;
var gender midterm final;
title1 "List Report of Exam Weights";
footnote "&sysdate at &systime";
run;