--- %%NOBANNER%% -->
/*-------------------<-- Start of Description -->--------------------\
| Nonparametric Analysis of 2-Sample means with different variances: |
| Compute p-value and confidence interval of two sample location |
| differences; |
|--------------------<-- End of Description -->----------------------|
|--------------------------------------------------------------------|
|--------------<-- Start of Files or Arguments Needed -->------------|
| The following parameters are needed for the macro: |
| indata = Input dataset; |
| class = Grouping variable (with values 1, 2); |
| trtgrp = Treatment group Indicator: a value to represent |
| treatment; |
| AVAR = Analysis variable; |
| ALPHA = 100*(1-ALPHA)% level for the two-sided confidence |
| interval; default is 0.05; |
| tail = two-sided or one sided confidence interval; default |
| is 2; |
| better = The smaller the better or The bigger the better; |
| Superiority Test or Non-Inferiority Test; |
| default is "The bigger the better"; |
| tiesadj = Ties Adjustment; Default is True. |
| outdata = output data set; |
|---------------<-- End of Files or Arguments Needed -->-------------|
|--------------------------------------------------------------------|
|----------------<-- Start of Example and Usage -->------------------|
| Example: |
| data hw_ex2; |
| input grp :$20. val; |
| cards; |
| HealthGreese 297 |
| HealthGreese 340 |
| HealthGreese 325 |
| HealthGreese 227 |
| HealthGreese 277 |
| HealthGreese 337 |
| HealthGreese 250 |
| HealthGreese 290 |
| Lead-Poisoned 293 |
| Lead-Poisoned 291 |
| Lead-Poisoned 289 |
| Lead-Poisoned 430 |
| Lead-Poisoned 510 |
| Lead-Poisoned 353 |
| Lead-Poisoned 318 |
| ; |
| run; |
| %Flp2mean(indata=hw_ex2, class=grp, avar=val, tail=1,alpha=0.08, |
| trtgrp='Lead-Poisoned', outdata=test); |
| Usage: %Flp2mean(indata=, class=, trtgrp= avar=, alpha=0.05, |
| tail=2, outdata=); |
| Reference: Hollander M and Wolfe DA, "Nonparmetric Statistical |
| Methods," pp ??-??. |
\-------------------<-- End of Example and Usage -->----------------*/
%macro Flp2mean(indata=, class=, trtgrp=, avar=, alpha=0.05, tail=2,
better=BIG, tiesadj=T,outdata=);
/*--------------------------------------------\
| Author: Duo Zhou; |
| Created: 2-17-2002 6:30pm; |
| Purpose: Nonparametric Analysis for 2-sample|
| means with Different Variances; |
\--------------------------------------------*/
%local _grplist _dummy _grpn _i_ _j_ _grpi _varchk _varlen _qpdiff _minimumn _tmplast_;
%let _tmplast_=&syslast;
%if (%index(&trtgrp,%str(%'))) or (%index(&trtgrp,%str(%")))%then %do;
%let trtgrp=%sysfunc(dequote(&trtgrp));
%end;
proc sql noprint;
select count(distinct &class) into: _grpn
from &indata
where &class is not missing;
quit;
%if &_grpn < 2 and &_grpn >=1 %then %do;
%put ==> Alert! In order to compare 2-Sample location problem with unequal variances,;
%put +++ you must provide me with a group variable of 2 groups: &_grpn groups;
%put +++ is not enough for comparison.;
%goto finish;
%end;
%else %if &_grpn > 2 %then %do;
%put ==> Alert! I can only compare Two-Sample location problem with Unequal Variances,;
%put +++ you must provide me with a group variable of 2 groups: I don%str(%')t ;
%put +++ know how to deal with &_grpn groups.;
%goto finish;
%end;
%let _minimumn=0;
proc sql noprint;
select min(totobs) into: _minimumn
from (select count(*) as totobs
from &indata
group by &class);
quit;
%if &_minimumn<10 %then %do;
%put ==> Note: Sample size is extremely small, do not trust the P-value in the output, which was;
%put +++ calculated using large sample approximation. In order by get a relatively accurate;
%put +++ p-value, please compare the calculated statistics provided in the output with the;
%put +++ table given in book "Nonparmetric Statistical Methods" (Hollander M and Wolfe DA).;
%end;
%else %if &_minimumn<30 %then %do;
%put ==> Note: Sample size is relatively small, P-value given in the output is calcualted using;
%put +++ large sample approximation, so use it carefully. But you can manually compare the;
%put +++ calculated statistics provided in the output with the table given in book;
%put +++ "Nonparmetric Statistical Methods" (Hollander M and Wolfe DA).;
%end;
%else %do;
%put ===> Note: the nonparametric analysis is carried out using large sample approprimation, so;
%put +++ be careful about the P-value given in the output.;
%end;
proc ttest data=&indata;
var &avar;
class &class;
title1 'T-Test of Means between Group 1 - Group 2';
run;
proc npar1way wilcoxon data=&indata;
var &avar;
class &class;
title1 'Wilcoxon Rank-Sum Test of Means between Group 1 - Group 2';
run;
title;
/* Start of Fligner-Policello Procedure*/
proc rank data=&indata out=_tmpr1;
var &avar;
ranks _ranka;
run;
proc contents data=_tmpr1 out=_vchecktmp noprint;
run;
proc sql noprint;
select type, length into: _varchk, :_varlen
from _vchecktmp
where name="&class";
quit;
%if (%quote(&trtgrp) eq) %then %do;
proc sql noprint;
select distinct &class into: _grplist separated by ','
from &indata
where &class is not missing;
quit;
%end;
%else %do;
%let _trtchk=;
proc sql noprint;
select &class into: _trtchk
from &indata
/* &trtgrp refer to a value, but &class is referring to a data variable; */
%if (%quote(&_varchk) eq %quote(1)) %then %do;
where &class = &trtgrp
%end;
%else %do;
where upcase(&class)="%upcase(&trtgrp)"
%end;;
quit;
/* Check the class variable to see if it has a treatment group, otherwise error; */
%if (%quote(&_trtchk) ne) %then %do;
/* The treatment group will be the 2nd group, the control group will be the 1st; */
proc sql noprint;
select distinct &class,
%if (%quote(&_varchk) eq %quote(1)) %then %do; (1+(&class=&trtgrp)) as _cgrp %end;
%else %do; (1+(upcase(&class)="%upcase(&trtgrp)")) as _cgrp %end;
into: _grplist separated by ',',
: _dummy
from _tmpr1
where &class is not missing
order by _cgrp;
quit;
%end;
%else %do;
%put ==> Alert! The class variable &class does not have a treatment group as you specified: &trtgrp;
%put +++ Sorry, I can%str(%')t do any further analysis;
%goto finish;
%end;
%end;
%do _i_=1 %to &_grpn;
%let _grp&_i_=%trim(%left(%qscan(%quote(&_grplist), &_i_, %str(,))));
%end;
%if (%index(%quote(%upcase(&tiesadj)),T)) or (%quote(&tiesadj)=1) %then %do;
proc sql;
create table _ties1 as
select h1.&class, h1.&avar, h1._ranka, h2._ntie2
from _tmpr1 as h1 left join
(select distinct &class, &avar, _ranka, count(*) as _ntie2
from _tmpr1
where &class="&_grp1" and
_ranka in (select distinct _ranka
from _tmpr1
where &class="&_grp2")
group by _ranka) as h2
on h1.&class ne h2.&class and h1._ranka=h2._ranka;
create table _tmpr1 as
select h1.&class, h1.&avar, h1._ranka, h1._ntie2, h2._ntie1
from _ties1 as h1 left join
(select distinct &class, &avar, _ranka, count(*) as _ntie1
from _ties1
where &class="&_grp2" and
_ranka in (select distinct _ranka
from _ties1
where &class="&_grp1")
group by _ranka) as h2
on h1.&class ne h2.&class and h1._ranka=h2._ranka
order by h1._ranka, h1.&class;
quit;
%end;
%else %do;
proc sort data=_tmpr1;
by _ranka &class;
run;
%end;
data _tmp1(keep=&class &avar _ranka %do _i_=1 %to &_grpn; _p&_i_ %end;)
%do _i_=1 %to &_grpn;
_grptmp&_i_(keep=&class &avar _ranka _p&_i_)
%end;;
set _tmpr1;
by _ranka &class;
retain %do _i_=1 %to &_grpn; _x&_i_ _tmpx&_i_ _p&_i_ %end; 0;
%do _j_=1 %to &_grpn;
%let _jc_=%eval(%eval(&_grpn-&_j_)+1);
%if &_varchk=1 %then %do;
if &class=&&_grp&_j_ then do;
%end;
%else %do;
if &class="&&_grp&_j_" then do;
%end;
%if (%index(%quote(%upcase(&tiesadj)),T)) or (%quote(&tiesadj)=1) %then %do;
_tmpx&_j_=_tmpx&_j_+1;
if first.&class then do;
if _ntie&_j_ ne . then do;
_x&_jc_=_x&_jc_+_ntie&_j_/2;
end;
else do;
_x&_j_=_tmpx&_j_;
_x&_jc_=_tmpx&_jc_;
end;
end;
%end;
%else %do;
_x&_j_=_x&_j_+1;
%end;
_p&_j_=_x&_jc_;
_p&_jc_=0;
output _grptmp&_j_;
end;
label _p&_j_="Number of Sample %upcase(&&_grp&_jc_) Observations Less Then %upcase(&&_grp&_j_)";
%end;
output _tmp1;
run;
%do _i_=1 %to &_grpn;
proc means data=_grptmp&_i_ noprint ;
class &class;
var _p&_i_;
output out=_tmp1out&_i_ N=_n sum=_sum mean=_mean std=_std;
run;
%end;
proc sql noprint;
create table _tmp1out as
select t1.&class as _group1,
(t1._n-1)*t1._std*t1._std as _v1,
(t1._n*t1._mean) as _psum,
(t1._mean) as _pbar,
t2.&class as _group2,
(t2._n-1)*t2._std*t2._std as _v2,
(t2._n*t2._mean) as _qsum,
(t2._mean) as _qbar
from _tmp1out1 as t1, _tmp1out2 as t2
where t1.&class is not missing and t2.&class is not missing;
select (t2._n*t2._mean) - (t1._n*t1._mean) into: _qpdiff
from _tmp1out1 as t1, _tmp1out2 as t2
where t1.&class is not missing and t2.&class is not missing;
quit;
data _tmp2out;
set _tmp1out;
file print;
_uhead=(_qsum-_psum)/(2*sqrt(_v1+_v2+_pbar*_qbar));
put / +5 'Hepothesis Test For Two-Sample Location Differences With Unequal Variances using'
/ +5 'large sample approximation and Ties Adjustment with Fligner-Policello Procedure'
/ +5 '(Reference: Hollander M and Wolfe DA,"Nonparametric Statistical Methods," pp ??-??)';
/* 2, q - the Treatment group */
%if (%quote(&tail) eq %quote(2)) or (%index(%quote(%upcase(&tail)),TWO)) %then %do;
_za=probit(1-&alpha/2);
_pvalue=(1-probnorm(abs(_uhead)))*2;
_lcl=(_qsum-_psum)-_za*2*sqrt(_v1+_v2+_pbar*_qbar);
_ucl=(_qsum-_psum)+_za*2*sqrt(_v1+_v2+_pbar*_qbar);
put / +5 %if &_varchk=1 %then %do; "Two Sided Test: H0: P(Group &_grp2-&_grp1>0)=1/2 vs"
/ +5 " H1: P(Group &_grp2-&_grp1>0)<>1/2;" %end;
%else %do; "Two Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)=1/2 vs"
/ +5 " H1: P(Group '&_grp2'-'&_grp1'>0)<>1/2;" %end;
put / +10 "The Fligner-Policello Statistics = " _uhead;
put / +10 "%sysevalf((1-&alpha)*100)% confidence bound" @42 "= (" _lcl +(-1) ", " +(-1) _ucl ")";
format _lcl _ucl 10.6; keep _lcl _ucl;
label _lcl="Lower Confidence Bound" _ucl="Upper Confidence Bound";
%end;
%else %if (%quote(&tail) eq %quote(1)) or (%index(%upcase(&tail),ONE)) %then %do;
_za=probit(1-&alpha);
%if (%index(%quote(%upcase(&better)), BIG)) %then %do;
_pvalue=(1-probnorm(_uhead));
_lcl=(_qsum-_psum)-_za*2*sqrt(_v1+_v2+_pbar*_qbar);
put / +5 %if &_varchk=1 %then %do; "One Sided Test: H0: P(Group &_grp2-&_grp1>0)<=1/2 vs"
/ +5 " H1: P(Group &_grp2-&_grp1>0)> 1/2;"; %end;
%else %do; "One Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)<=1/2 vs"
/ +5 " H1: P(Group '&_grp2'-'&_grp1'>0)> 1/2;"; %end;
put / +10 "The Fligner-Policello Statistics = " _uhead;
put / +10 "%sysevalf((1-&alpha)*100)% confidence bound" @44 "= (" _lcl +(-1) ", +Inf)";
format _lcl 10.6; keep _lcl; label _lcl="Lower Confidence Bound";
%end;
%else %if (%index(%quote(%upcase(&better)), SMALL)) %then %do;
_pvalue=probnorm(_uhead);
_ucl=(_qsum-_psum)+_za*2*sqrt(_v1+_v2+_pbar*_qbar);
put / +5 %if &_varchk=1 %then %do; "One Sided Test: H0: P(Group &_grp2-&_grp1>0)>=1/2 vs"
/ +5 " H1: P(Group &_grp2-&_grp1>0)< 1/2;"; %end;
%else %do; "One Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)>=1/2 vs"
/ +5 " H1: P(Group '&_grp2'-'&_grp1'>0)< 1/2;"; %end;
put / +10 "The Fligner-Policello Statistics = " _uhead;
put / +10 "%sysevalf((1-&alpha)*100)% confidence bound" @44 "= (-Inf, " _ucl +(-1) ")";
format _ucl 10.6; keep _ucl; label _ucl="Upper Confidence Bound";
%end;
%end;
put / +10 "P-value" @44 "= " _pvalue;
format _uhead _pvalue 10.6; keep _uhead _pvalue;
label _uhead="Fligner-Policello Statistics" _pvalue="P-value";
run;
%if (%quote(&outdata) ne) %then %do;
data &outdata;
set _tmp2out;
run;
%end;
%else %let syslast=&_tmplast_;
proc datasets library=work nolist;
delete _ties1 _tmp1 _tmpr1 _grptmp1 - _grptmp%trim(%left(&_grpn))
_tmp1out _tmp1out1 - _tmp1out%trim(%left(&_grpn));
run;quit;
%finish:
%mend Flp2mean;