Options nocenter linesize=81 pagesize=66; /* THIS PROGRAM PERFORMS A RANDOM-INTERCEPTS BINARY REGRESSION ANALYSIS */ /* (either logistic or probit) */ /* AS DISCUSSED IN HEDEKER & GIBBONS (1994) "A RANDOM-EFFECTS ORDINAL */ /* REGRESSION MODEL FOR MULTILEVEL DATA" BIOMETRICS 50:933-944 */ /* (THE ARTICLE INCLUDES DISCUSSION OF MORE GENERAL MODELS AS WELL) */ /* THIS MACRO PROGRAM WAS ORIGINALLY WRITTEN IN SPSS AND GAUSS */ /* BY DON HEDEKER AND THEN TRANSLATED TO SAS IML BY SIU CHI WONG */ /* IT USES MAXIMUM MARGINAL LIKELIHOOD ESTIMATION */ /* ANY COMMENTS OR QUESTIONS CAN BE SENT TO DON HEDEKER hedeker@uic.edu */ /* PLEASE FEEL FREE TO REDISTRIBUTE THIS MACRO */ /* */ /* THIS MACRO PROGRAM IS A BIT SLOW */ /* THE PROBLEM BELOW TOOK SEVERAL MINUTES TO RUN ON A PENTIUM COMPUTER */ /* ALTERNATIVELY, USING THE STAND-ALONE MIXOR PROGRAM */ /* THE SAME PROBLEM TOOK SECONDS */ /* */ /* */ /* */ OPTIONS NOCENTER LINESIZE=81 PAGESIZE=66; TITLE1 'NIMH Schiz Data - mixed-effects binary regression'; DATA ONE; INFILE 'C:\data\schizb1.dat' ; input SUBJID IMPS79B INT SQRTWK DRUG DRUGSWK; LABEL IMPS79B= 'Severity of Illness'; PROC FORMAT; VALUE IMPS79B 0='Well' 1='Ill'; VALUE DRUG 0='Placebo' 1='Drug'; /*** for the Random-effects program the data ***/ /*** needs to be sorted by the cluster ID ***/ /*** (in the longitudinal setting this is the ***/ /*** subject ID) ***/ /*** and observations with missing data on either ***/ /*** the dependent variable, the cluster ID ***/ /*** or any of the independent variables ***/ /*** need to be excluded ***/ PROC SORT DATA=ONE; BY SUBJID; DATA NOMISS; SET ONE; MISSVAR = 0; IF ((SUBJID=.) OR (IMPS79B=.) OR (SQRTWK=.) OR (DRUG=.) OR (DRUGSWK=.)) THEN MISSVAR = 1; IF (MISSVAR = 0) THEN OUTPUT; PROC MEANS DATA=NOMISS; VAR SUBJID IMPS79B INT SQRTWK DRUG DRUGSWK; /*** 2-LEVEL RANDOM-INTERCEPT BINARY (probit or logistic) REGRESSION ***/ /*** ***/ /*** N = number of clusters ***/ /*** NTOT = number of total observations ***/ /*** NI = vector of size N with number of observations per cluster ***/ /*** CODE = numeric codes for the two outcome categories ***/ /*** (listed in the proper order) ***/ /*** NOTE THAT THE ONLY LINES BELOW THAT NEED TO BE CHANGED ARE ***/ /*** ID = MIXDAT[,1]; ***/ /*** (list cluster ID variable) ***/ /*** X = MIXDAT[,2:4]; ***/ /*** (list the P independent variables) ***/ /*** Y = MIXDAT[,5]; ***/ /*** (list the dependent variable for the analysis) ***/ /*** VARNAMES = {'Group' 'Time' 'Grp*Time'}; ***/ /*** (list 8-character variable names for each independent variable) ***/ /*** DEPNAME = 'Clarity'; ***/ /*** (list an 8-character variable name for the dependent variable) ***/ /*** CODE = {0, 1}; ***/ /*** (list the values of the binary dependent variable) ***/ /*** RESPFN = 1; ***/ /*** (desired response function: 0=probit, 1=logistic) ***/ /*** CONVERGE = 0.0001; ***/ /*** (give the convergence criterion ***/ /*** by default the convergence criterion is set below to .0001 ***/ /*** if a more precise solution is desired ***/ /*** change this number to a smaller number) ***/ /*** ***/ PROC IML; reset noname; USE NOMISS VAR{ SUBJID SQRTWK DRUG DRUGSWK IMPS79B}; READ ALL INTO MIXDAT; ID = MIXDAT[,1]; X = MIXDAT[,2:4]; Y = MIXDAT[,5]; VARNAMES = {'SqrtWeek' 'Drug' 'Drug*Swk'}; DEPNAME = 'Severity'; CODE = {0, 1}; CONVERGE = 0.0001; RESPFN = 1; MUNAME = 'Intercpt'; VNAME = 'Clust SD'; P = NCOL(X); npar = p+2; IF (RESPFN = 0) THEN RESPNAME = 'Probit'; ELSE RESPNAME = 'Logistic'; PRINT 'RANDOM-INTERCEPT BINARY REGRESSION ANALYSIS'; PRINT DEPNAME; PRINT 'Response Function'; PRINT RESPNAME; /*** FIGURE OUT VECTOR OF SAMPLE SIZES NI ***/ NTOT = NROW(ID); N = 1; COUNT = 1; IDOLD = 0; NITEMP = J(NTOT,1,0); DO I = 1 TO NTOT; IDTEMP = ID[I]; IF (IDOLD = 0) THEN IDOLD = IDTEMP; ELSE IF (I = NTOT) THEN DO; IF (IDTEMP = IDOLD) THEN NITEMP[N] = COUNT + 1; ELSE DO; NITEMP[N] = COUNT; N = N + 1; NITEMP[N] = 1; END; END; ELSE IF (IDTEMP = IDOLD) THEN COUNT = COUNT + 1; ELSE DO; NITEMP[N] = COUNT; N = N + 1; COUNT = 1; IDOLD = IDTEMP; END; END; NI = NITEMP[1:N,1]; PRINT 'NUMBER OF CLUSTERS'; PRINT N; PRINT 'TOTAL NUMBER OF OBSERVATIONS'; PRINT NTOT; PRINT 'CLUSTER SAMPLE SIZES'; PRINT ((NI)`); /*** parameter starting values ***/ MEANY = SUM(Y) / NTOT; MEANX = X[+,] / NTOT; STDY = SQRT(SSQ(Y - MEANY) / (NTOT-1)); XDEV = J(NTOT,P,0); DO L = 1 TO P; XDEV[,L] = X[,L] - MEANX[L]; END; BETA = (INV(XDEV` * XDEV) * XDEV` * (Y - MEANY[1])) / STDY[1]; XB = MEANX * BETA; FREQY = J(2,1,0); CUMSUM= 0; DO I = 1 TO NTOT; DO J = 1 TO 2; IF (Y[I] = CODE[J]) THEN DO; FREQY[J] = FREQY[J] + 1; goto find1; END; END; find1: END; CUMSUM = CUMSUM + FREQY[1]; CUMODDS= CUMSUM / (NTOT - CUMSUM); LNCUMOD= LOG(CUMODDS); IF (RESPFN = 0) THEN TEMPPAR = 0.625 # (LNCUMOD + XB); ELSE TEMPPAR = LNCUMOD + XB; MU = 0 - TEMPPAR; SIGMA = 0.31623; /*** number of quadrature points, quadrature nodes & weights ***/ nq = 10; bq = { -4.85946282833231, -3.58182348355193, -2.48432584163895, -1.46598909439116, -0.48493570751550, 0.48493570751550, 1.46598909439116, 2.48432584163895, 3.58182348355193, 4.85946282833231}; aq = { 0.00000431065265, 0.00075807095698, 0.01911158107317, 0.13548370704150, 0.34464234526294, 0.34464234526294, 0.13548370704150, 0.01911158107317, 0.00075807095698, 0.00000431065265}; /*** start iterations ***/ IT = 0; MAXCORR=1; DO UNTIL (MAXCORR < CONVERGE); IT = IT + 1; PRINT '*********'; PRINT 'ITERATION'; PRINT IT ; PRINT '*********'; logl= 0; /*** calculate the derivatives and information matrix ***/ nn = 0; der2 = J(npar,npar,0); der = J(npar,1,0); DO i=1 TO n; LIK = J(NQ,1,0); h = 0; if (i > 1) then nn = nn + ni[i-1]; derp = J(npar,1,0); DO q=1 TO nq; psum = 0; derq = J(npar,1,0); DO k=1 TO ni[i]; nob = nn+k; XB = 0; DO l=1 TO p; XB = XB + beta[l] # X[nob,l]; END; z = mu+XB+sigma#bq[q]; if (y[nob] = code[2]) then do; IF (RESPFN = 0) then do; prob = probnorm(z); probp = EXP((0-Z)*Z/2) / 2.506628275; END; IF (RESPFN = 1) then do; prob = 1 / (1 + EXP(0-z)); probp = EXP(0-Z) / ((1 + EXP(0-z))**2); END; END; if (y[nob] = code[1]) then do; IF (RESPFN = 0) then do; prob = 1 - probnorm(z); probp = 0 - EXP((0-Z)*Z/2) / 2.506628275; END; IF (RESPFN = 1) then do; prob = 1 - (1 / (1 + EXP(0-z))); probp = 0 - EXP(0-Z) / ((1 + EXP(0-z))**2); END; END; if (prob < .000000001) then prob = .000000001; PSUM = PSUM + LOG(PROB); deriv = probp/prob; derq[1] = derq[1] + deriv; do L=2 TO p+1; derq[L] = derq[L] + deriv # X[nob,L-1]; END; derq[p+2] = derq[p+2] + deriv # bq[q]; END; lik[q] = exp(psum); h = h + lik[q] # aq[q]; derp[1] = derp[1] + (derq[1] # lik[q] # aq[q]); DO L=2 TO p+1; derp[L] = derp[L] + (derq[L] # lik[q] # aq[q]); END; derp[p+2] = derp[p+2] + (derq[p+2] # lik[q] # aq[q]); END; IF (H > 0) then logl = logl + log(h); DO L=1 TO npar; der[L] = der[L] + derp[L]/h; END; scal = EXP(0 - 2*log(h)); DER2 = DER2 + scal # (DERP * DERP`); END; print 'log likelihood =' LOGL; infmat = inv(der2); corec = infmat*der; PRINT 'corrections =' (COREC`) [format=f12.6]; maxcorr = max(abs(corec)); mu = mu + corec[1]; PRINT 'mu =' MU [format=f12.6]; DO L=1 TO p; beta[L]=beta[L]+corec[L+1]; END; print 'beta =' (BETA`) [format=f12.6]; sigma = sigma + corec[p+2]; print 'sigma =' (SIGMA`) [format=f12.6]; END; /* COMPUTE STANDARD ERRORS, Z STATISTICS, AND P-VALUES FOR ALL PARAMETERS */ /* AND PUT THESE VECTORS AS COLUMN VECTORS IN BETARES & VARRES */ /* NOTE THAT THE P-VALUES ARE TWO-TAILED FOR THE REGRESSION COEFFICIENTS */ /* AND ONE-TAILED FOR THE VARIANCE TERM */ SE = sqrt(vecdiag(inv(der2))); MURES = J(1,4,0); MURES[1,1] = MU; MURES[1,2] = SE[1,1]; MURES[1,3] = MURES[1,1] / MURES[1,2]; MURES[1,4] = 2 * probNORM(0 - ABS(MURES[1,3])); P1 = P+1; BETARES = J(P,4,0); BETARES[1:P,1] = BETA; BETARES[1:P,2] = SE[2:P1,1]; BETARES[1:P,3] = BETARES[1:P,1] / BETARES[1:P,2]; BETARES[1:P,4] = 2 * probNORM(0 - ABS(BETARES[1:P,3])); P2 = P+2; VARRES = J(1,4,0); VARRES[1,1] = SIGMA; VARRES[1,2] = SE[P2,1]; VARRES[1,3] = VARRES[1,1] / VARRES[1,2]; VARRES[1,4] = 2 * PROBNORM(0 - ABS(VARRES[1,3])); SIGMA2 = SIGMA*SIGMA; IF (RESPFN = 0) then ERVAR = 1; Else do; PI = 3.141592654; ERVAR = PI*PI/3; END; INTCLASS = SIGMA2 / (SIGMA2 + ERVAR); /*** PRINT OUT THE RESULTS ***/ PRINT ' '; PRINT ' '; PRINT '***************'; PRINT ' FINAL RESULTS '; PRINT '***************'; print 'Total Iterations =' it [format=3.0]; print 'Quadrature Points =' nq [format=3.0]; print 'Convergence Criterion =' converge; PRINT 'Log-Likelihood =' LOGL [format=12.6]; DEVIAN = 2*(0-LOGL); PRINT '-2 Log-Likelihood =' DEVIAN [format=12.6]; PRINT ' '; PRINT 'REGRESSION PARAMETERS'; PRINT ' ESTIMATE SE Z P-VAL(2)'; PRINT MURES [rowname=muname FORMAT=12.6]; PRINT 'COVARIATES'; PRINT BETARES [rowname=VARNAMES format=12.6]; PRINT ' '; PRINT 'STANDARD DEVIATION OF RANDOM-EFFECT DISTRIBUTION'; PRINT VARRES [rowname=VNAME format=12.6]; PRINT ' '; PRINT 'INTRACLASS CORRELATION'; PRINT INTCLASS [FORMAT=F12.6];