Skip to content

Commit

Permalink
feat(qualify, qualify_multi): 参数 GROUPBY 支持指定输出格式 (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
Snoopy1866 authored Jan 16, 2025
1 parent c1bba11 commit 800c209
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 139 deletions.
25 changes: 10 additions & 15 deletions docs/qualify/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,25 +98,20 @@ VAR = SEX

默认情况下,各分类按照频数从大到小排列,频数较大的分类将显示在输出数据集中靠前的位置。

当指定一个输出格式作为排序依据时,该输出格式应当使用 `VALUE` 语句生成,例如:

```sas
proc format;
value sexn
1 = "男"
2 = "女";
run;
```

宏程序将根据格式化之前的数值对各分类进行排序。

> [!IMPORTANT]
>
> - 若参数 `BY` 指定了基于某个输出格式进行排序,则该格式必须是 CATALOG-BASED,即在 `DICTIONARY.FORMATS` 表中,变量 `source` 的值应当是 `C`
> [!TIP]
> - 当指定一个输出格式作为排序依据时,该输出格式应当使用 `VALUE` 语句生成,例如:
>
> ```sas
> proc format;
> value sexn
> 1 = "男"
> 2 = "女";
> run;
> ```
>
> - 如果需要按照参数 `VAR` 自身的值进行排序,可以指定 `BY = %nrstr(&VAR)`
> 宏程序将根据格式化之前的数值对各分类进行排序
**Usage** :
Expand Down
27 changes: 19 additions & 8 deletions docs/qualify_multi/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,37 @@ GROUP = ARM("试验组", "对照组")

### GROUPBY

**Syntax** : _variable_<(ASC\<ENDING\> | DESC\<ENDING\>)>
**Syntax** :

- _variable_<(ASC\<ENDING\> | DESC\<ENDING\>)>
- _format_<(ASC\<ENDING\> | DESC\<ENDING\>)>

指定分组变量的排序变量及排序方向
指定各分组在输出数据集中的排列顺序依据

**Default** : #AUTO

默认情况下,各个分组的输出结果根据分组水平名称在当前语言环境下的默认排列顺序排序(例如:gbk 环境下,按照水平名称的汉语拼音顺序)

> [!WARNING]
> [!IMPORTANT]
>
> - 参数 `GROUPBY` 不允许指定不存在于参数 `INDATA` 指定的数据集中的变量;
> [!NOTE]
> - 若参数 `GROUPBY` 指定了基于某个输出格式进行排序,则该格式必须是 CATALOG-BASED,即在 `DICTIONARY.FORMATS` 表中,变量 `source` 的值应当是 `C`
> - 当指定一个输出格式作为排序依据时,该输出格式应当使用 `VALUE` 语句生成,例如:
>
> ```sas
> proc format;
> value armn
> 1 = "试验组"
> 2 = "对照组";
> run;
> ```
>
> - 参数 `GROUP` 若指定了分组变量的各水平名称,则各水平分组的统计结果将按照参数 `GROUP` 中各水平名称指定的顺序显示在输出数据集中,此时参数 `GROUPBY` 无效
> 宏程序将根据格式化之前的数值对各分类进行排序
**Usage** :
```sas
GROUPBY = ARMN
GROUPBY = ARMN(desc)
GROUPBY = ARMN.
```
[**Example**](#指定分组变量的排序变量)
Expand Down
25 changes: 15 additions & 10 deletions gbk/qualify.sas
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Version Date: 2023-03-08 1.0.1
2025-01-09 1.0.27
2025-01-14 1.1.0
2025-01-15 1.1.1
2025-01-16 1.1.2
===================================
*/

Expand Down Expand Up @@ -137,8 +138,7 @@ Version Date: 2023-03-08 1.0.1
select count(*) into : nobs from &indata;
quit;
%if &nobs = 0 %then %do;
%put ERROR: 分析数据集 &indata 为空!;
%goto exit_with_error;
%put NOTE: 分析数据集 &indata 为空!;
%end;
%end;
%end;
Expand Down Expand Up @@ -725,26 +725,29 @@ Version Date: 2023-03-08 1.0.1

/*汇总*/
proc sql noprint;
select count(*) into :total_n from tmp_qualify_indata_unique_total;
create table tmp_qualify_outdata_label as
select
distinct
0 as IDT,
0 as SEQ,
%unquote(%superq(label_sql_expr)) as ITEM,
%if &total = TRUE %then %do;
/*频数*/
(select sum(&var_name in (%do i = 1 %to &var_level_n; &&var_level_&i %end;)) from tmp_qualify_indata_unique_total)
coalesce(sum(&var_name in (%do i = 1 %to &var_level_n; &&var_level_&i %end;)), 0)
as FREQ,
strip(put(calculated FREQ, &FREQ_format)) as FREQ_FMT,
/*频数-兼容旧版本*/
calculated FREQ as N,
calculated FREQ_FMT as N_FMT,
/*频次*/
(select sum(&var_name in (%do i = 1 %to &var_level_n; &&var_level_&i %end;)) from tmp_qualify_indata)
coalesce((select sum(&var_name in (%do i = 1 %to &var_level_n; &&var_level_&i %end;)) from tmp_qualify_indata), 0)
as TIMES,
strip(put(calculated TIMES, &TIMES_format)) as TIMES_FMT,
/*频率*/
1 as RATE,
strip(put(1, &RATE_format)) as RATE_FMT,
ifn(&total_n = 0, ., 1) as RATE,
ifc(not missing(calculated RATE), strip(put(1, &RATE_format)), "-")
as RATE_FMT,
%do j = 1 %to &stat_n;
%temp_combpl_hash("&&string_&j") || strip(calculated &&stat_&j.._FMT) ||
%end;
Expand All @@ -761,7 +764,7 @@ Version Date: 2023-03-08 1.0.1
"" as RATE_FMT,
"" as VALUE
%end;
from tmp_qualify_indata_unique_total(firstobs = 1 obs = 1);
from tmp_qualify_indata_unique_total;
quit;

%do i = 1 %to &var_level_n;
Expand All @@ -773,17 +776,19 @@ Version Date: 2023-03-08 1.0.1
%unquote(%superq(indent_sql_expr)) || %unquote(&&var_level_note_&i) || %unquote(%superq(suffix_sql_expr))
as ITEM,
/*频数*/
sum(&var_name = &&var_level_&i) as FREQ,
coalesce(sum(&var_name = &&var_level_&i), 0) as FREQ,
strip(put(calculated FREQ, &FREQ_format)) as FREQ_FMT,
/*频数-兼容旧版本*/
calculated FREQ as N,
calculated FREQ_FMT as N_FMT,
/*频次*/
(select sum(&var_name = &&var_level_&i) from tmp_qualify_indata) as TIMES,
coalesce((select sum(&var_name = &&var_level_&i) from tmp_qualify_indata), 0)
as TIMES,
strip(put(calculated TIMES, &TIMES_format)) as TIMES_FMT,
/*频率*/
calculated N/count(*) as RATE,
strip(put(calculated RATE, &RATE_format)) as RATE_FMT,
ifc(not missing(calculated RATE), strip(put(calculated RATE, &RATE_format)), "-")
as RATE_FMT,
%do j = 1 %to &stat_n;
%temp_combpl_hash("&&string_&j") || strip(calculated &&stat_&j.._FMT) ||
%end;
Expand Down
144 changes: 96 additions & 48 deletions gbk/qualify_multi.sas
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Version Date: 2023-12-26 0.1
2024-07-15 0.10
2024-11-14 0.11
2025-01-14 0.12
2025-01-15 0.13
===================================
*/

Expand Down Expand Up @@ -129,8 +130,7 @@ Version Date: 2023-12-26 0.1
select count(*) into : nobs from &indata;
quit;
%if &nobs = 0 %then %do;
%put ERROR: 分析数据集 &indata 为空!;
%goto exit_with_error;
%put NOTE: 分析数据集 &indata 为空!;
%end;
%end;
%end;
Expand Down Expand Up @@ -190,65 +190,112 @@ Version Date: 2023-12-26 0.1
%end;
/*GROUPBY*/
%if &IS_GROUP_LEVEL_SPECIFIED = TRUE %then %do;
%if %superq(groupby) ^= %bquote() and %superq(groupby) ^= #AUTO %then %do;
%put WARNING: 已通过参数 GROUP 指定了分组的排序,参数 GROUPBY 已被忽略!;
%end;
%if %superq(groupby) = %bquote() %then %do;
%put ERROR: 参数 GROUPBY 为空!;
%goto exit_with_error;
%end;
%else %do;
%if %superq(groupby) = %bquote() %then %do;
%put ERROR: 未指定分组排序变量!;
%goto exit_with_error;
%end;
%else %if %superq(groupby) = #AUTO %then %do;
%else %if %superq(groupby) = #AUTO %then %do;
%put NOTE: 未指定分组的排序方式,将按照分组变量自身的值升序排列!;
%let groupby = &group_var(desc);
%end;
/*解析参数 by, 检查合法性*/
%let reg_groupby_id = %sysfunc(prxparse(%bquote(/^(?:([A-Za-z_][A-Za-z_\d]*)|(?:([A-Za-z_]+(?:\d+[A-Za-z_]+)?)\.))(?:\(\s*((?:DESC|ASC)(?:ENDING)?)\s*\))?$/)));
%if %sysfunc(prxmatch(&reg_groupby_id, %superq(groupby))) %then %do;
%let groupby_var = %sysfunc(prxposn(&reg_groupby_id, 1, %superq(groupby)));
%let groupby_fmt = %sysfunc(prxposn(&reg_groupby_id, 2, %superq(groupby)));
%let groupby_direction = %sysfunc(prxposn(&reg_groupby_id, 3, %superq(groupby)));
%if %bquote(&groupby_var) ^= %bquote() %then %do;
/*检查排序变量存在性*/
proc sql noprint;
create table tmp_qualify_m_groupby_sorted as select * from %superq(indata) where not missing(&group_var);
select type into :type from DICTIONARY.COLUMNS where libname = "&libname_in" and memname = "&memname_in" and upcase(name) = "&groupby_var";
quit;
%if &SQLOBS = 0 %then %do;
%put ERROR: 在 &libname_in..&memname_in 中没有找到分组排序变量 &groupby_var;
%goto exit_with_error;
%end;
%end;
%else %do;
%let reg_groupby_id = %sysfunc(prxparse(%bquote(/^([A-Za-z_][A-Za-z_\d]*)(?:\(((?:ASC|DESC)(?:ENDING)?)\))?$/)));
%if %sysfunc(prxmatch(&reg_groupby_id, %superq(groupby))) %then %do;
%let groupby_var = %sysfunc(prxposn(&reg_groupby_id, 1, %superq(groupby)));
%let groupby_direction = %sysfunc(prxposn(&reg_groupby_id, 2, %superq(groupby)));
/*检查排序变量存在性*/
proc sql noprint;
select type into :type from DICTIONARY.COLUMNS where libname = "&libname_in" and memname = "&memname_in" and upcase(name) = "&groupby_var";
quit;
%if &SQLOBS = 0 %then %do; /*数据集中没有找到变量*/
%put ERROR: 在 &libname_in..&memname_in 中没有找到分组排序变量 &groupby_var;
%goto exit_with_error;
%end;
proc sql noprint;
create table tmp_qualify_m_groupby_sorted as
select
distinct
&group_var,
&groupby_var
from %superq(indata) where not missing(&group_var) order by &groupby_var &groupby_direction, &group_var;
quit;
%if %bquote(&groupby_fmt) ^= %bquote() %then %do;
/*检查排序格式存在性*/
proc sql noprint;
select libname, memname, source into :groupby_fmt_libname, :groupby_fmt_memname, :groupby_fmt_source from DICTIONARY.FORMATS where fmtname = "&groupby_fmt";
quit;
%if &SQLOBS = 0 %then %do;
%put ERROR: 参数 BY 指定的排序格式 &groupby_fmt.. 不存在!;
%goto exit_with_error;
%end;
%else %do;
%put ERROR: 参数 GROUPBY 必须指定一个合法的变量名!;
%goto exit_with_error;
%if &groupby_fmt_source ^= C %then %do;
%put ERROR: 参数 BY 指定的排序格式 &groupby_fmt.. 不是 CATALOG-BASED!;
%goto exit_with_error;
%end;
%end;
%end;
/*创建宏变量,用于输出数据集的变量标签*/
/*检查排序方向*/
%if %bquote(&groupby_direction) = %bquote() %then %do;
%put NOTE: 未指定分组的排序方向,默认升序排列!;
%let groupby_direction = ASCENDING;
%end;
%else %if %bquote(&groupby_direction) = ASC %then %do;
%let groupby_direction = ASCENDING;
%end;
%else %if %bquote(&groupby_direction) = DESC %then %do;
%let groupby_direction = DESCENDING;
%end;
%end;
%else %do;
%put ERROR: 参数 GROUPBY = %bquote(&groupby) 格式不正确!;
%goto exit_with_error;
%end;
%if %bquote(&groupby_var) ^= %bquote() %then %do;
proc sql noprint;
select quote(strip(&group_var)) into : group_level_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频数)') into : group_level_freq_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频数格式化)') into : group_level_freq_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频数)(兼容)') into : group_level_n_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频数格式化)(兼容)') into : group_level_n_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频次)') into : group_level_times_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频次格式化)') into : group_level_times_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频率)') into : group_level_rate_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(&group_var) || '(频率格式化)') into : group_level_rate_fmt_1- from tmp_qualify_m_groupby_sorted;
select count(distinct &group_var) into : group_level_n from tmp_qualify_m_groupby_sorted;
create table tmp_qualify_m_groupby_sorted as
select
distinct
&group_var as group_level,
&groupby_var as group_level_by_criteria
from %superq(indata) where not missing(&group_var) order by &groupby_var &groupby_direction, &group_var;
quit;
%end;
%else %if %bquote(&groupby_fmt) ^= %bquote() %then %do;
proc format library = &groupby_fmt_libname..&groupby_fmt_memname cntlout = tmp_qualify_m_groupby_fmt;
select &groupby_fmt;
run;
proc sql noprint;
create table tmp_qualify_m_groupby_sorted(where = (not missing(group_level))) as
select
distinct
coalescec(a.&group_var, b.label) as group_level,
ifn(not missing(b.label), input(strip(b.start), 8.), constant('BIG'))
as group_level_by_criteria,
ifc(missing(b.label), 'Y', '') as group_level_fmt_not_defined
from %superq(indata) as a full join tmp_qualify_m_groupby_fmt as b on a.&group_var = b.label
order by group_level_by_criteria &groupby_direction, group_level ascending;
select sum(group_level_fmt_not_defined = "Y") into : groupby_fmt_not_defined_n trimmed from tmp_qualify_m_groupby_sorted where not missing(group_level);
%if &groupby_fmt_not_defined_n > 0 %then %do;
%put WARNING: 指定用于分组排序的输出格式中,存在 &groupby_fmt_not_defined_n 个分类名称未定义,输出结果可能是非预期的!;
%end;
quit;
%end;
/*创建宏变量,用于输出数据集的变量标签*/
proc sql noprint;
select quote(strip(group_level)) into : group_level_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频数)') into : group_level_freq_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频数格式化)') into : group_level_freq_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频数)(兼容)') into : group_level_n_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频数格式化)(兼容)') into : group_level_n_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频次)') into : group_level_times_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频次格式化)') into : group_level_times_fmt_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频率)') into : group_level_rate_1- from tmp_qualify_m_groupby_sorted;
select quote(strip(group_level) || '(频率格式化)') into : group_level_rate_fmt_1- from tmp_qualify_m_groupby_sorted;
select count(distinct group_level) into : group_level_n from tmp_qualify_m_groupby_sorted;
quit;
/*OUTDATA*/
Expand Down Expand Up @@ -435,6 +482,7 @@ Version Date: 2023-12-26 0.1
proc datasets noprint nowarn;
delete tmp_qualify_m_indata
tmp_qualify_m_outdata
tmp_qualify_m_groupby_fmt
tmp_qualify_m_groupby_sorted
tmp_qualify_m_res_sum
%do i = 1 %to &group_level_n;
Expand Down
Loading

0 comments on commit 800c209

Please sign in to comment.