-
Notifications
You must be signed in to change notification settings - Fork 0
/
Effectively Presenting Results Key.sas
253 lines (219 loc) · 7.93 KB
/
Effectively Presenting Results Key.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
* ============================================================================;
* Lab: Effectively Presenting Your Results
* This code is posted for your benefit. However, I highly recommend that you
* practice typing your own SAS programs as well. With the SAS programming
* language, as with all new languages, immersion seems to be the best way to
* learn.
* ============================================================================;
proc format;
value gender 1 = 'Female'
2 = 'Male';
value agegroup 1 = "20-24"
2 = "25-30"
3 = "30+";
value race_eth 1 = "White, non-Hispanic"
2 = "Black, non-Hispanic"
3 = "Hispanic"
4 = "Asian"
5 = "Other Race, non-Hispanic";
value married 0 = "Not Currently Married"
1 = "Currently Married";
run;
* Task 1.
* Use SAS to determine the N and Percent of students in each age group,
* race/ethnicity, and marital status within levels of gender. Create a
* demographics table (usually table 1 in publications) that compares the above
* characteristics of male and female students. Create the table described above
* in Microsoft Word and save it as a .docx file. Your table should include an
* appropriate title and be formatted for publication.
* Note: The variable age is calculated from the variables month and year. You
* should assume that each person was born on the 15th of the month, and we
* want their age as of the first day of class (Assume everyone took the survey
* on January 10, 2014). The categories for age are 20-24, 25-29, and 30+. The
* variable race_eth should include the following categories: non-Hispanic white,
* non-Hispanic black, Hispanic, and Asian non-Hispanic. Finally,
* marital status should include the following categories: currently married,
* not currently married.
* ============================================================================;
proc import
out = class
datafile = "C:\Users\Mbc0022\Desktop\Class Survey 5141.csv"
dbms = csv replace;
datarow = 3;
run;
data class2
(keep = marital hispanic race birth_year birth_month gender age agegroup
race_eth married);
set class
(rename = (q18 = marital
q15 = hispanic
q16 = race
q24 = birth_year
q25 = birth_month
q26 = gender));
/* Create age variable */
birth_year = birth_year + 1949;
dob = mdy(birth_month, 15, birth_year);
age = int(yrdif(dob, '10JAN2014'd, 'Actual'));
/* Create age groups variable */
if missing(age) then agegroup = .;
else if 20 <= age < 25 then agegroup = 1;
else if 25 <= age < 30 then agegroup = 2;
else agegroup = 3;
/* Create race / ethnicity variable */
if hispanic = 9 then hispanic = .;
if race = 9 then race = .;
if missing(hispanic) | missing(race) then race_eth = .;
else if hispanic = 1 then race_eth = 3;
else if race = 1 then race_eth = 1;
else if race = 2 then race_eth = 2;
else if race = 4 then race_eth = 4;
else race_eth = 5;
/* Create married variable */
if missing(marital) then married = .;
else if marital = 1 then married = 1;
else married = 0;
run;
* Group n's;
proc freq data = class2;
tables gender;
format gender gender.;
run;
* First attempt;
proc freq data = class2;
title1 "5313 Lab Task 1";
title2 "Demographics for Table 1";
table gender * (agegroup race_eth married);
format gender gender.
agegroup agegroup.
race_eth race_eth.
married married.;
run;
* Second attempt - better;
ods rtf file = 'C:\Users\mbc0022\Desktop\module_9_lab_task_1.rtf';
proc freq data = class2;
title1 "5313 Lab Task 1";
title2 "Demographics for Table 1";
table (agegroup race_eth married) * gender / nopercent norow;
format gender gender.
agegroup agegroup.
race_eth race_eth.
married married.;
run;
ods rtf close;
* Third attempt - even better (but more programming);
* Run everything down to "ods rtf close";
title;
proc freq data = class2;
table agegroup * gender / out = agegroup outpct;
format gender gender.
agegroup agegroup.;
run;
proc freq data = class2;
table race_eth * gender / out = race_eth outpct;
format gender gender.
race_eth race_eth.;
run;
proc freq data = class2;
table married * gender / out = married outpct;
format gender gender.
married married.;
run;
data table1 (keep = characteristic gender n_percent);
length characteristic $21;
set agegroup race_eth married;
if pct_col = . then delete;
if agegroup ~= . then characteristic = put(agegroup, agegroup.);
if race_eth ~= . then characteristic = put(race_eth, race_eth.);
if married ~= . then characteristic = put(married, married.);
n_percent = cats(count,
' (',
put(pct_col, 4.1),
')');
label n_percent = "n (%)";
run;
proc sort data = table1;
by gender;
run;
ods rtf file = 'C:\Users\mbc0022\Desktop\module_9_lab_task_1.rtf';
proc print data = table1 label;
title1 "5313 Lab Task 1";
title2 "Demographics for Table 1";
run;
ods rtf close;
* Task 2.
* Create a bar chart that displays the percent of males and females in each
* age group. Create the chart described above in Microsoft Excel or in SAS.
* Paste it into the .docx file you created in Task 1, and save again. Your
* chart should include an appropriate title, axis labels, and be formatted for
* publication.
* ============================================================================;
proc freq data = class2;
title1 "5313 Lab";
title2 "Frequency Table for Task 2";
table gender * agegroup / out = freq outpct;
format gender gender.
agegroup agegroup.;
run;
* Output to Excel;
* In this DATA step we convert gender and agegroup to character variables.
* The new values for these character variables are the character strings from
* our formats. In other words, instead of having formatted numeric variables,
* we will now have character variables that look like our formats. This is to
* help with readability after we export the data to Excel.
* ============================================================================;
data freq;
format gender_c agegroup_c;
set freq;
gender_c = put(gender, gender.);
agegroup_c = put(agegroup, agegroup.);
run;
proc export
data = freq
dbms = xlsx replace
outfile = "C:\Users\mbc0022\Desktop\lab_task2.xlsx"
replace;
run;
* Now use the Excel file to make a bar graph in Excel;
* Task 3
* Create a line chart that compares the average monthly temperatures for 1995
* and 2012. Months should be on the x-axis and average temperature should be
* on the y-axis. The line graph should have two lines: one for 1995 and one
* for 2012. Include a graph title, a label for each axis, and format the
* graph so that it would be publication ready. Create the chart described
* above in Microsoft Excel or in SAS. Paste it into the .docx file you created
* in Task 1, and save again.
* ============================================================================;
data temps_1995 (keep = sort_order month average1995);
set "C:\Users\Mbc0022\Desktop\dfwtemps1995.sas7bdat";
array temps {31} _numeric_;
do i = 1 to 31;
if temps{i} = -99 then temps{i} = .;
end;
average1995 = round(mean(of temp_f1-temp_f31),.1);
sort_order = _N_;
run;
data temps_2012 (keep = sort_order month average2012);
set "C:\Users\Mbc0022\Desktop\dfwtemps2012.sas7bdat";
array temps {31} _numeric_;
do i = 1 to 31;
if temps{i} = -999 then temps{i} = .;
end;
average2012 = round(mean(of temp_f1-temp_f31),.1);
sort_order = _N_;
run;
data temps_both (drop = sort_order);
merge temps_1995 temps_2012;
by sort_order;
run;
proc print data = temps_both;
title1 "5313 Lab Task 3";
title2 "Average Temperatures by Month";
run;
proc export
data = temps_both
dbms = xlsx replace
outfile = "C:\Users\mbc0022\Desktop\lab_task3.xlsx"
replace;
run;
* Now use the Excel file to make a line graph in Excel;