/*JOBPARM T=(0,40),L=8,R=2048 /*ROUTE PRINT UICVM.U37331 // EXEC FORTVCLG //FORT.SYSIN DD * CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C C C CLUSPAC: Computer Programs for Mixture-Model Clustering C C C C COPYRIGHT 1991 STANLEY LOUIS SCLOVE. C C C C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C C C C C CMS FILE NAME OF PROGRAM: MIXPCMA CLUSPAC C C VERSION 4.6 3-MAR-94 C C C C C C DEVELOPED AND PROGRAMMED BY: C C C C DR. STANLEY L. SCLOVE 312/996-2681 C C DEPARTMENT C C OF INFORMATION AND DECISION SCIENCES M/C 294 C C COLLEGE OF BUSINESS ADMINISTRATION C C UNIVERSITY OF ILLINOIS AT CHICAGO C C BOX 4348, CHICAGO, IL 60680-4348 C C C C C C COPYRIGHT 1991 STANLEY LOUIS SCLOVE. C C C C C C ISOPAC consists of the CLUSPAC clustering programs, C C the TSPAC time-series segmentation programs and the C C IMPAC image-segmentation programs. C C C C CLUSPAC is a set of programs implementing clustering C C algorithms derived under the assumption of Gaussian C C class-conditional distributions. The ISDT* programs in C C CLUSPAC are based on the so-called "classification" C C likelihood. The MIX* programs are based on the mixture- C C model likelihood. C C C C Program MIXPCMA in the CLUSPAC package is one of the C C mixture-model programs for clustering multivariate data. C C (For univariate data the "MIX1" programs may be used.) C C MIXPCM and MIXPCMA assume a common covariance matrix C C across distributions. MIXPDT and MIXPDTA allow different C C covariance matrices. C C MIXPCMA tries a range of numbers of clusters, with C C automatic setting of initial values of the parameters. C C C C C C ..............................................................C C C C C C C RESTRICTIONS (CAN BE MODIFIED): C N, SAMPLE SIZE, AT MOST 1000; C IP, NUMBER OF VARIABLES, AT MOST 20; C K, NUMBER OF CLUSTERS, AT MOST 29; C ITER, MAXIMUM NUMBER OF ITERATIONS, 20. C C SUBROUTINE(S) CALLED: C MATEQ, WHICH CALLS MATDT C C C C C ..............................................................C C C DIMENSION X(1000,20),SUM(29,20),ID(1000) DIMENSION F(1000,29),PP(29,1000),PPOLD(29,1000) DIMENSION ICLUS(1000),ICLSOL(1000) DIMENSION DENOM(1000),XMXPR(1000) DIMENSION DSQ(29) DIMENSION TITLE(18) DIMENSION NG(29),XBAR(29,20) DIMENSION DATFMT(18),INPFMT(18) DIMENSION SSD(29,20,20) DIMENSION WGSS(20,20) DIMENSION VARHAT(20,20),COVMX(20,20) DIMENSION XMIN(20),XMAX(20) DIMENSION IV(20,20) DIMENSION P(20,20) DIMENSION XJ(9,9) DIMENSION PR(29),IPARM(29),XM2LL(29) DIMENSION AICVEC(9),SCHVEC(9),SCHPPH(9),AICPPH(9),PRIRLN(9) DIMENSION TOTAL(29) DIMENSION XMEAN(29) DIMENSION SUMSQS(20,20) DIMENSION SSDEVS(20,20) C CHARACTER*6 ID C DOUBLE PRECISION SUM DOUBLE PRECISION WGSS,SSD DOUBLE PRECISION PI DOUBLE PRECISION VARHAT,COVMX DOUBLE PRECISION SSDEVS,SUMSQS,TOTAL DOUBLE PRECISION SUMLNF,SUMPXF DOUBLE PRECISION PR,DENOM,PP,AICPPH,SCHPPH,PRIRLN,PPOLD DOUBLE PRECISION P DOUBLE PRECISION DET,ZSQ,TRUDET DOUBLE PRECISION DSQ DOUBLE PRECISION XBAR DOUBLE PRECISION DEVV,DEVW DOUBLE PRECISION F C C C C C IV IS A WORK ARRAY FOR SUBROUTINE MATEQ. C C C C CONTROL CARDS: C C DATASET TITLE C N, IN FORMAT (2X,I4) C IP, IN FORMAT (3X,I2) C INPFMT, IN FORMAT (18A4), E.G., (4F4.1,1X,A6) C DATFMT, IN FORMAT (18A4), E.G., (4F4.1) C "DATFMT" WILL ALSO BE USED FOR OUTPUT, SO ALLOW AT LEAST ONE C BLANK AT THE BEGINNING FOR CARRIAGE CONTROL. C DATA, ONE CASE AT A TIME, IN FORMAT SPECIFIED BY DATFMT C C READ (5,15000) TITLE C C WRITE PROGRAM INFORMATION. WRITE (6,10050) WRITE (6,10000) WRITE (6,10050) WRITE (6,10999) WRITE (6,11000) TITLE C THE FOLLOWING ARE IMSL DATE AND TIME FUNCTIONS: CALL TDATE(IDAY,MONTH,IYEAR) C CALL DTIME(IHOUR,MINUTE,ISEC) C WRITE (6,11100) MONTH,IDAY,IYEAR,IHOUR,MINUTE,ISEC C C READ SAMPLE SIZE, N. READ (5,12000) N C READ NUMBER OF VARIABLES, IP. READ (5,17000) IP WRITE (6,19000) IP WRITE (6,13000) N C C READ DATA FORMAT. READ (5,15000) INPFMT READ (5,15000) DATFMT C C READ DATA. DO 500 I = 1,N READ (5,INPFMT) (X(I,JV), JV = 1,IP),ID(I) IF (I .EQ. 1) GO TO 100 GO TO 300 100 CONTINUE DO 200 JV = 1,IP XMAX(JV) = X(1,JV) XMIN(JV) = X(1,JV) 200 CONTINUE 300 CONTINUE DO 400 JV = 1,IP IF (X(I,JV) .LT. XMIN(JV)) XMIN(JV)=X(I,JV) IF (X(I,JV) .GT. XMAX(JV)) XMAX(JV)=X(I,JV) 400 CONTINUE 500 CONTINUE WRITE (6,16000) WRITE (6,DATFMT) (XMIN(JV),JV=1,IP) WRITE (6,15500) WRITE (6,DATFMT) (XMAX(JV),JV=1,IP) C WRITE (6,11111) WRITE (6,11112) DO 111 I=1,4 WRITE (6,INPFMT) (X(I,JV),JV=1,IP),ID(I) 111 CONTINUE KUP = MIN(9,N) WRITE (6,11096) KUP 11096 FORMAT(/' NUMBER OF CLUSTERS TO REPORT. . . . . . . . 1 TO', XI2/) C C SET CONSTANTS. XN = N PI = .314159265358979324D+01 C C SET INITIAL VALUES OF MIXING PROBABILITIES:-- C C SET CONSTANTS. C XJ(IC,K), K=1 TO 9, IC = 1 TO K C OPTIMAL CLASS PROBABILITIES - NORMAL DISTRIBUTION C JOHARI & SCLOVE (1975). "PARTITIONING A DISTRIBUTION." C COMMUNICATIONS IN STATISTICS. XJ(1,2)=.5 XJ(2,2)=.5 XJ(1,3)=.2703 XJ(2,3)=.4594 XJ(3,3)=.2703 XJ(1,4)=.1631 XJ(2,4)=.3369 XJ(3,4)=.3369 XJ(4,4)=.1631 XJ(1,5)=.1068 XJ(2,5)=.2444 XJ(3,5)=.2976 XJ(4,5)=.2444 XJ(5,5)=.1068 XJ(1,6)=.0739 XJ(2,6)=.1810 XJ(3,6)=.2451 XJ(4,6)=.2451 XJ(5,6)=.1810 XJ(6,6)=.0739 XJ(1,7)=.0536 XJ(2,7)=.1375 XJ(3,7)=.1986 XJ(4,7)=.2106 XJ(5,7)=.1986 XJ(6,7)=.1375 XJ(7,7)=.0536 XJ(1,8)=.0402 XJ(2,8)=.1067 XJ(3,8)=.1613 XJ(4,8)=.1918 XJ(5,8)=.1918 XJ(6,8)=.1613 XJ(7,8)=.1067 XJ(8,8)=.0402 XJ(1,9)=.0310 XJ(2,9)=.0845 XJ(3,9)=.1324 XJ(4,9)=.1643 XJ(5,9)=.1756 XJ(6,9)=.1643 XJ(7,9)=.1324 XJ(8,9)=.0845 XJ(9,9)=.0310 C C EVALUATE MODEL-SELECTION CRITERIA FOR K=1 (NO CLUSTERING): C C COMPUTE SUM VECTOR AND SUM-OF-PRODUCTS MATRIX: DO 105 JV = 1,IP TOTAL(JV) = 0.0D+00 DO 105 JW = 1,IP SUMSQS(JV,JW) = 0.0D+00 105 CONTINUE DO 110 JV = 1,IP DO 110 I = 1,N TOTAL(JV) = TOTAL(JV) + X(I,JV) 110 CONTINUE DO 120 JV = 1,IP DO 120 JW = 1,IP DO 120 I = 1,N SUMSQS(JV,JW) = SUMSQS(JV,JW) + X(I,JV)*X(I,JW) 120 CONTINUE DO 130 JV = 1,IP XMEAN(JV) = TOTAL(JV)/XN 130 CONTINUE DO 140 JV = 1,IP DO 140 JW = 1,IP SSDEVS(JV,JW) = SUMSQS(JV,JW)- TOTAL(JV)*TOTAL(JW)/XN VARHAT(JV,JW) = SSDEVS(JV,JW)/XN 140 CONTINUE C WRITE SUMMARY STATISTICS FOR WHOLE SAMPLE: WRITE (6,10050) WRITE(6,21000) WRITE(6,22000) WRITE(6,10100) ( XMEAN(JV), JV = 1,IP ) WRITE(6,10200) DO 150 JV = 1,IP WRITE(6,48000) ( VARHAT(JV,JW), JW=1,IP ) 150 CONTINUE C C C CALL SUBROUTINE TO COMPUTE DETERMINANT OF SAMPLE COVARIANCE C MATRIX: C IDET = 1 NRS1 = 0 DO 2404 JV = 1,IP DO 2404 JW = 1,IP COVMX(JV,JW) = VARHAT(JV,JW) 2404 CONTINUE CALL MATEQ(COVMX, IP,20,JFLG,DET,IDET,IV,NRS1,P,20) C C GENERAL FORM OF CALL IS: C CALL MATEQ(A,M,N,JFLG,DET,IDET,IV,NRS1,P,LL) C SEE SUBROUTINE LISTING FOR FULLER EXPLANATION. C DET(VARHAT) = DET*10.0**IDET C IF (JFLG .GT. 0) WRITE (6,60000) JFLG C C XIDET = IDET C XLGDET = DLOG(DET) + XIDET*ALOG(10.0) TRUDET = DET*(10.0**IDET) IF (JFLG .GT. 0) WRITE (6,62000) DET,IDET C C MAX LIKELIHOOD = C (2*PI)**(-N*P/2)*(DET OF COV MX)**(-N/2)*EXP(-N*P/2) C COMPUTE LOG OF MAX LIKELIHOOD: XLL=-(XN*IP/2.0)*DLOG(2.0*PI)-(XN/2.0)*DLOG(TRUDET)-XN*IP/2.0 XMN2LL = (-2.0)*XLL XM2LL(1) = XMN2LL WRITE(6,37000) XMN2LL C NO. OF PARAMETERS FOR UNCLUSTERED SAMPLE IS: C 1 MEAN VECTOR + 1 COV MX. NOPARM = IP + IP*(IP+1)/2 IPARM(1) = NOPARM C AIC = XMN2LL + 2.0*NOPARM WRITE(6,70000) AIC SCH = XMN2LL + ALOG(XN)*NOPARM WRITE (6,71000) SCH AICVEC(1) = AIC SCHVEC(1) = SCH WRITE (6,10050) C C C PERFORM CLUSTERING FOR VARIOUS NUMBERS OF CLUSTERS, K: C DO 850 K = 2,KUP WRITE (6,26000) K C C C SET CONSTANTS FOR THIS VALUE OF K. C PARAMETERS FOR MODEL WITH COMMON COVARIANCE MATRIX ARE C K MEAN VECTORS AND ONE COVARIANCE MATRIX AND K-1 INDEPENDENT C MIXING PROBABILITIES. C NOPARM = K*IP + IP*(IP+1)/2 + (K-1) C C WRITE (6,72000) NOPARM C SET INITIAL VALUES OF PARAMETER ESTIMATES FOR THIS VALUE OF K: C C SET INITIAL VALUES OF PRIOR PROBABILITIES EQUAL TO THE C OPTIMAL VALUES FOR A NORMAL DISTRIBUTION: C DO 360 IC = 1,K PR(IC) = XJ(IC,K) 360 CONTINUE C C TO SET INITIAL VALUES OF PRIOR PROBABILITIES EQUAL TO C BINOMIAL(X=IC-1;N=K-1,P=1/2). C FOR IC = 1,2,...,K, C REMOVE THE "C" FROM CC1 OF THE NEXT LINE: C PR(IC) = GAMMA(XK)/(GAMMA(XC)*GAMMA(XK-XC+1)*(2**K)) C C FOR EQUAL INITIAL VALUES OF PRIOR PROBABILITIES, C REMOVE THE "C" FROM CC1 OF THE NEXT LINE: C PR(IC) = 1.0/K . C C INITIALIZE POSTERIOR PROBABILITIES: DO 549 I = 1,N DO 549 IC = 1,K PPOLD(IC,I) = 1.0/K 549 CONTINUE C WRITE (6,20000) C C SET INITIAL MEANS EQUAL TO K OF THE OBSERVATIONS C The rationale for this method assumes that the data have been C sorted according to some potentially important variable. DO 550 JV = 1,IP XBAR(1,JV) = X(1,JV) 550 CONTINUE DO 555 IG = 2,K I = INT( (IG-1)*N/(K-1) ) DO 555 JV = 1,IP XBAR(IG,JV) = X(I,JV) 555 CONTINUE C C ALTERNATIVELY, HERE IS ANOTHER SCHEME FOR COMPUTING INITIAL C MEANS (in two dimensions this scheme results in means oriented C from southwest to northeast): C DO 601 JV = 1,IP C DO 601 IG = 1,K C W = IG/(K+1.0) C XBAR(IG,JV) = (1-W)*XMIN(JV) + W*XMAX(JV) C 601 CONTINUE C C DO 600 IG=1,K WRITE (6,DATFMT) ( XBAR(IG,JV), JV=1,IP) 600 CONTINUE C C SET INITIAL COVARIANCE MATRIX: C (HERE SOME SLIGHT POSITIVE CORRELATION IN PROGRAMMED IN. C THIS COULD BE CHANGED.) DO 610 JV=1,IP DO 610 JW=1,IP VARHAT(JV,JW)=0.40 610 CONTINUE DO 620 JV = 1,IP VARHAT(JV,JV) = 1.0 620 CONTINUE C C COMPUTE INITIAL INVERSE COVARIANCE MATRIX: C IDET = 1 NRS1 = 0 DO 2406 JV = 1,IP DO 2406 JW = 1,IP COVMX(JV,JW) = VARHAT(JV,JW) 2406 CONTINUE CALL MATEQ(COVMX, IP,20,JFLG,DET,IDET,IV,NRS1,P,20) C C GENERAL FORM OF CALL IS: C CALL MATEQ(A,M,N,JFLG,DET,IDET,IV,NRS1,P,LL) C SEE SUBROUTINE LISTING FOR FULLER EXPLANATION. C DET(VARHAT) = DET*10.0**IDET C IF (JFLG .GT. 0) WRITE (6,60000) JFLG C C XIDET = IDET C XLGDET = DLOG(DET) + XIDET*ALOG(10.0) TRUDET = DET*(10.0**IDET) C C ITER = 1 700 CONTINUE WRITE(6,32000) ITER IF (ITER .EQ. 1) GO TO 910 DO 800 I = 1,N ICLSOL(I) = ICLUS(I) 800 CONTINUE C 910 CONTINUE C C COMMENCE COMPUTATION OF F(I,IC), I=1,...,N, IC=1,...,K: C C COMPUTE MAHALANOBIS D-SQUARE BETWEEN THE I-TH OBSERVATION AND C THE L-TH MEAN, L=1,2,...,K, I=1,2,...,N:-- C DO 1200 I = 1,N C DO 1000 L=1,K DSQ(L) = 0.0D+00 DO 1310 JV=1,IP DEVV = XBAR(L,JV) - X(I,JV) DO 1310 JW=1,IP DEVW = XBAR(L,JW) - X(I,JW) DSQ(L) = DSQ(L) + DEVV*P(JV,JW)*DEVW 1310 CONTINUE ZSQ = DSQ(L) C IF D-SQ IS INORDINATELY LARGE, SET VALUE OF PDF TO ZERO C (IT IS EXTREMELY SMALL ANYWAY, AND THIS AVOIDS UNDERFLOW): IF ( ZSQ/2.0 .LE. 174.673 ) GO TO 1090 F(I, L) = 0.0D+00 GO TO 1100 C 1090 CONTINUE F(I, L) =DEXP(-ZSQ/2.0)/((2.0*PI)**(IP/2.0)*DSQRT(TRUDET)) 1100 CONTINUE 1000 CONTINUE 1200 CONTINUE C C COMPUTE LOG LIKELIHOOD AND VALUES OF MODEL-SELECTION CRITERIA: C SUMLNF = 0.0D+00 DO 2200 I=1,N SUMPXF = 0.0D+00 DO 2100 IC=1,K SUMPXF = SUMPXF + PR(IC)*F(I,IC) 2100 CONTINUE C IF ( SUMPXF .EQ. 0.0 ) GO TO 2200 SUMLNF = SUMLNF + DLOG(SUMPXF) 2200 CONTINUE C XMN2LL = -2.0*SUMLNF XM2LL(K) = XMN2LL C WRITE (6,69000) XMN2LL C C C COMPUTE MODEL SELECTION CRITERIA C C PARAMETERS: C K MEAN VECTORS OF DIMENSION P AND A P-BY-P COVARIANCE MATRIX, C WHERE P IS THE NUMBER OF VARIABLES, AND K-1 INDEPENDENT C MIXING PROBABILITIES. C NOPARM = K*IP + IP*(IP+1)/2 + (K-1) IPARM(K) = NOPARM C AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM WRITE (6,70000) AIC WRITE (6,71000) SCH AICVEC(K) = AIC SCHVEC(K) = SCH C C C COMPUTE POSTERIOR PROBABILITIES OF GROUP MEMBERSHIP, C PP(IC,I), THE CONDITIONAL PROBABILITY OF POP'N IC, GIVEN X(I), C AS PR(IC)*F(I,IC)/DENOM(I): DO 1350 I = 1,N DENOM(I) = 0.0D+00 DO 1350 IC=1,K DENOM(I) = DENOM(I) + PR(IC)*F(I,IC) 1350 CONTINUE C DO 1400 I = 1,N IF ( DENOM(I) .GT. 0.0D+00 ) GO TO 1410 C IF DENOM(I)=0, PP(IC,I) IS SET EQUAL TO OLD PP(IC,I) SO THAT C THE I-TH CASE WILL STILL HAVE WEIGHT IN SUCCEEDING C COMPUTATIONS: C WRITE (6,14560) ID(I) 14560 FORMAT(1X,'THE DENOMINATOR OF THE POSTERIOR PROBS FOR '/ X1X,A6, ' IS 0, SO THE POSTERIOR PROBS PP(IC,I), IC=1,...,K, '/ X' HAVE BEEN SET EQUAL TO THOSE FROM THE PRECEDING ITERATION '/ X' SO THAT THE I-TH CASE WILL STILL ', X'HAVE WEIGHT IN SUCCEEDING COMPUTATIONS.') C DO 1456 IC = 1,K PP(IC,I) = PPOLD(IC,I) 1456 CONTINUE GO TO 1401 C 1410 CONTINUE DO 1402 IC=1,K PP(IC,I)= PR(IC)*F(I,IC)/DENOM(I) 1402 CONTINUE C 1401 CONTINUE 1400 CONTINUE C IF ( N-15 ) 1421,1421,1422 1422 CONTINUE WRITE(6,76000) DO 1420 I = 1,4 WRITE(6,96000) ( PP(IC,I), IC = 1,K ) 1420 CONTINUE GO TO 1423 1421 CONTINUE DO 1424 I = 1,N WRITE (6,25000) I, (PP(IC,I),IC=1,K) 1424 CONTINUE 1423 CONTINUE C C C UPDATE PARAMETER ESTIMATES: C C C UPDATE CLUSTER PRIOR PROBABILITIES PR(IC):-- DO 3800 IC = 1,K PR(IC) = 0.0D+00 DO 3800 I = 1,N PR(IC) = PR(IC) + PP(IC,I) 3800 CONTINUE DO 3900 IC = 1,K PR(IC) = PR(IC)/N 3900 CONTINUE DO 1750 IG = 1,K DO 1750 JV = 1,IP SUM(IG,JV) = 0.0D+00 DO 1750 JW = 1,IP SSD(IG,JV,JW) = 0.0D+00 1750 CONTINUE C C UPDATE MEANS:-- DO 1875 IC = 1,K DO 1875 JV = 1,IP DO 1875 I = 1,N SUM(IC,JV) = SUM(IC,JV) + PP(IC,I)*X(I,JV) 1875 CONTINUE DO 1900 IG = 1,K IF (N*PR(IG) .LT. 0.5) GO TO 2050 GO TO 2150 2050 WRITE (6,74000) IG WRITE (6,10050) C IF INSUFFICIENT WEIGHT IN ANY CLUSTER, GO ON TO NEXT K: GO TO 850 2150 CONTINUE DO 1900 JV = 1,IP XBAR(IG,JV) = SUM(IG,JV)/(N*PR(IG)) 1900 CONTINUE C C C CALL XUFLOW(0) DO 3601 IC=1,K DO 3601 JV=1,IP DO 3601 JW=1,IP DO 3601 I=1,N DEVV=X(I,JV)-XBAR(IC,JV) DEVW=X(I,JW)-XBAR(IC,JW) IF ( PP(IC,I) .EQ. 0.0D+00 ) GO TO 3601 IF ( DEVV .EQ. 0.0D+00 ) GO TO 3601 IF ( DEVW .EQ. 0.0D+00 ) GO TO 3601 SSD(IC,JV,JW)=SSD(IC,JV,JW) + PP(IC,I)*DEVV*DEVW 3601 CONTINUE DO 3600 IC=1,K DO 3600 JV=1,IP DO 3600 I=1,N DEVV=X(I,JV)-XBAR(IC,JV) IF ( PP(IC,I) .EQ. 0.0D+00 ) GO TO 3600 IF ( DEVV .EQ. 0.0D+00 ) GO TO 3600 SSD(IC,JV,JV)=SSD(IC,JV,JV) + PP(IC,I)*(DEVV**2) 3600 CONTINUE C C C C C POOL: C DO 1950 JV = 1,IP DO 1950 JW = 1,IP WGSS(JV,JW) = 0.0D+00 1950 CONTINUE C DO 2300 JV = 1,IP DO 2300 JW = 1,IP DO 2300 IC = 1,K WGSS(JV,JW) = WGSS(JV,JW) + PR(IC)*SSD(IC,JV,JW) 2300 CONTINUE C COMPUTE VARHAT, MLE OF COMMON COVARIANCE MATRIX: DO 2400 JV = 1,IP DO 2400 JW = 1,IP VARHAT(JV,JW) = WGSS(JV,JW)/XN 2400 CONTINUE C C UPDATE PRECISION MATRIX AND DETERMINANT OF COVARIANCE MATRIX:-- C C C COPY VARHAT INTO COVMX TO BE USED AS MATRIX INPUT TO MATEQ C BECAUSE MATRIX INPUT IS DESTROYED BY MATEQ AND VARHAT WILL C BE NEEDED LATER: DO 2402 JV = 1,IP DO 2402 JW = 1,IP COVMX(JV,JW) = VARHAT(JV,JW) 2402 CONTINUE NRS1 = 0 IDET = 1 CALL MATEQ(COVMX, IP,20,JFLG,DET,IDET,IV,NRS1,P,20) C IF (JFLG .GT. 0) WRITE (6,60000) JFLG IF (JFLG .GT. 0) WRITE (6,62000) DET,IDET TRUDET = DET*(10.0**IDET) C C (END PARAMETER-ESTIMATE UPDATE SEQUENCE) C C IF (ITER .EQ. 1) GO TO 900 C STORE OLD LABELS: DO 825 I = 1,N ICLSOL(I) = ICLUS(I) 825 CONTINUE C 900 CONTINUE C C COMPUTE NEW LABELS BY MAX POSTERIOR PROBABILITY: DO 1600 I = 1,N XMXPR(I) = PP(1,I) ICLUS(I) = 1 DO 1600 IC = 2,K IF ( PP(IC,I) .GT. XMXPR(I) ) GO TO 1500 GO TO 1600 1500 XMXPR(I) = PP(IC,I) ICLUS(I) = IC 1600 CONTINUE C DO 1601 I = 1,N DO 1601 IC = 1,K PPOLD(IC,I) = PP(IC,I) 1601 CONTINUE C IF (N .GE. 31) GO TO 250 WRITE (6,14000) C IF ( N .GE. 31 ) GO TO 250 WRITE (6,24000) WRITE (6,18000) (ID(I),ICLUS(I), I=1,N) 250 CONTINUE C C C C IF (ITER .EQ. 1) GO TO 3000 C IF NEW CLUSTERING IS SAME AS OLD, NO FURTHER ITERATION IS C NECESSARY AND RESULTS FOR THIS VALUE OF K WILL BE PRINTED. C OTHERWISE, FURTHER ITERATIONS WILL BE PERFORMED. DO 2900 I = 1,N IF (ICLUS(I) .EQ. ICLSOL(I)) GO TO 2900 GO TO 3000 2900 CONTINUE GO TO 3300 3000 CONTINUE ITER = ITER + 1 IF (ITER.GE.21) GO TO 3100 C COUNT NUMBERS IN CLUSTERS: DO 3311 IC = 1,K NG(IC) = 0 3311 CONTINUE DO 3321 I = 1,N IC = ICLUS(I) NG(IC) = NG(IC) + 1 3321 CONTINUE WRITE (6,34000) (NG(IC),IC=1,K) C GO TO 700 3100 WRITE (6,68000) C IF MORE THAN 20 ITERATIONS, GO ON TO NEXT K: GO TO 850 C C C 3300 CONTINUE C C C COUNT NUMBERS IN CLUSTERS: DO 3310 IC = 1,K NG(IC) = 0 3310 CONTINUE DO 3320 I = 1,N IC = ICLUS(I) NG(IC) = NG(IC) + 1 3320 CONTINUE C C C PRINT FINAL RESULTS FOR THIS VALUE OF K: C C WRITE (6,34000) (NG(IC),IC=1,K) WRITE (6,36000) (PR(IC),IC=1,K) DO 2800 IC = 1,K WRITE (6,28000) IC, (XBAR(IC,JV),JV=1,IP) 2800 CONTINUE C WRITE (6,42000) DO 2500 JV=1,IP WRITE (6,48000) (VARHAT(JV,JW),JW=1,IP) 2500 CONTINUE C IF ( N .GE. 31 ) GO TO 3650 WRITE (6,52000) DO 3500 I = 1,N WRITE (6,50000) ID(I), ICLUS(I), ( X(I,JV),JV=1,IP ) 3500 CONTINUE 3650 CONTINUE C C C C C C C C WRITE (6,58000) ITER WRITE (6,34000) (NG(IG),IG=1,K) DO 3400 JV=1,IP 3400 CONTINUE C C C C COMPUTE VALUES OF MODEL-SELECTION CRITERIA: C C WRITE (6,72000) NOPARM AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM WRITE (6,71000) SCH WRITE (6,70000) AIC C C WRITE (6,44000) DO 851 IC=1,K WRITE (6,45000) IC DO 851 I = 1,N IF ( ICLUS(I) .EQ. IC ) WRITE (6,46000) ID(I) 851 CONTINUE C C C GO ON TO NEXT NUMBER OF CLUSTERS, IF NOT FINISHED. C WRITE (6,10050) 850 CONTINUE C C C COMPUTE VALUES OF MODEL-SELECTION CRITERIA AND CORRESPONDING C POSTERIOR PROBABILITIES OF THE MODELS: C C C SCHPPH(K) = PR(H(K))* C CONST*(2*PI)**(IPARM(K)/2)*EXP(-SCHVEC(K)/2)*PRIOR PDF(K); C HERE PR(H(K)) IS TAKEN TO BE UNIFORM. C AICPPH(K) = PR(H(K))* C CONST*(2*PI)**(IPARM(K)/2)*EXP(-SCHVEC(K)/2)*PRIOR PDF(K); C HERE PR(H(K)) IS TAKEN TO BE Const*EXP(-NOPARM(K)). C [SEE KASHYAP (1981).] C APPROXIMATE PRIOR PDF: PRIRLN(1) = -(1*IP/2.0)*DLOG(2.0*PI) -IP/2.0 SCHPPH(1) = (IPARM(1)/2.0)*DLOG(2.0*PI) - SCHVEC(1)/2.0 SCHPPH(1) = SCHPPH(1) + PRIRLN(1) AICPPH(1) = (IPARM(1)/2.0)*DLOG(2.0*PI) - SCHVEC(1)/2.0 AICPPH(1) = AICPPH(1) + PRIRLN(1) AICPPH(1) = AICPPH(1) - IPARM(1) C ADD CONSTANT SCHVEC(1)/2 OR AICVEC(1)/2 TO PREVENT UNDERFLOW: SCHPPH(1) = SCHPPH(1) + SCHVEC(1)/2 AICPPH(1) = AICPPH(1) + AICVEC(1)/2 DO 6110 K = 2,KUP C C APPROXIMATE PRIOR PDF: PRIRLN(K) = -(K*IP/2.0)*DLOG(2.0*PI) -K*IP/2.0 C This is the prior on the k distribution parameters. C It is suggested by a Gaussian prior with cov mx = I, C maximized. C SCHPPH(K) = (IPARM(K)/2.0)*DLOG(2.0*PI) - SCHVEC(K)/2.0 SCHPPH(K) = SCHPPH(K) + PRIRLN(K) AICPPH(K) = (IPARM(K)/2.0)*DLOG(2.0*PI) - SCHVEC(K)/2.0 AICPPH(K) = AICPPH(K) + PRIRLN(K) AICPPH(K) = AICPPH(K) - IPARM(K) C ADD CONSTANT SCHVEC(1)/2 OR AICVEC(1)/2 TO PREVENT UNDERFLOW: SCHPPH(K) = SCHPPH(K) + SCHVEC(1)/2 AICPPH(K) = AICPPH(K) + AICVEC(1)/2 6110 CONTINUE C AT THIS POINT, WE HAVE THE LOGS OF THE PPHS; WE NOW CONVERT C THEM TO THE PPHS: SCHPPH(1) = DEXP(SCHPPH(1)) AICPPH(1) = DEXP(AICPPH(1)) DO 6119 K = 2,KUP IF ( SCHPPH(K) .LE. -174.673 ) GO TO 6115 SCHPPH(K) = DEXP(SCHPPH(K)) GO TO 6119 6115 SCHPPH(K) = 0.0D+00 6119 CONTINUE DO 6121 K = 2,KUP IF ( AICPPH(K) .LE. -174.673 ) GO TO 6126 AICPPH(K) = DEXP(AICPPH(K)) GO TO 6121 6126 AICPPH(K) = 0.0D+00 6121 CONTINUE SPPHS = 0.0D+00 SPPHA = 0.0D+00 DO 6120 K = 1,KUP IF ( AICPPH(K) .GT. 0.0D+00 ) SPPHA = SPPHA + AICPPH(K) 6120 CONTINUE DO 6127 K = 1,KUP IF ( SCHPPH(K) .GT. 0.0D+00 ) SPPHS = SPPHS + SCHPPH(K) 6127 CONTINUE DO 6130 K = 1,KUP SCHPPH(K) = SCHPPH(K)/SPPHS AICPPH(K) = AICPPH(K)/SPPHA 6130 CONTINUE C C C WRITE VALUES OF MSC'S FOR VARIOUS K: C WRITE (6,11001) TITLE WRITE(6,38000) DO 605 K = 1,KUP WRITE(6,33000) K,XM2LL(K),IPARM(K),AICVEC(K),AICPPH(K), XSCHVEC(K),SCHPPH(K) 605 CONTINUE C C C C WRITE (6,10050) WRITE (6,11001) TITLE WRITE (6,11097) C C WRITE (6,86000) C WRITE (6,11100) MONTH,IDAY,IYEAR,IHOUR,MINUTE,ISEC C CALL DTIME(IHOUR,MINUTE,ISEC) CALL TDATE(IDAY,MONTH,IYEAR) C WRITE (6,89000) C WRITE (6,11100) MONTH,IDAY,IYEAR,IHOUR,MINUTE,ISEC WRITE (6,98000) STOP C C C ..............................................................C C C 10050 FORMAT(/' ................................................', X'................................'//) 11111 FORMAT(/' FIRST FOUR DATA POINTS') 11112 FORMAT(' ----------------------') 13000 FORMAT(/' NUMBER OF OBSERVATIONS (SAMPLE SIZE), N . . ',I3/) 19000 FORMAT(/' NUMBER OF VARIABLES . . . . . . . . . . . . ',I3 ) 10000 FORMAT('1','**********************************************'/// X' MIXPCMA CLUSPAC - MIXTURE-MODEL CLUSTERING OF CASES'// X' COMMON COVARIANCE MATRIX '/ X' AUTOMATIC SETTING OF INITIAL PARAMETER VALUES'// X' Copyright 1991 Stanley Louis Sclove. '/// X' Developed and programmed by: '// X19X,'Prof. Stanley L. Sclove, Ph.D. Phone (312) 996-2681'/ X19X,'Information & Decision Sciences Dept. M/C 294 '/ X19X,'College of Business Administration '/ X19X,'University of Illinois at Chicago '/ X19X,'Box 4348, Chicago, IL 60680-4348 '// X' Program Version: 4.6 3-Mar-94 '/ X' (VM/CMS) ') 10100 FORMAT(' MEAN VECTOR: ',(9F8.2/)//) 11097 FORMAT(/' MIXPCMA CLUSPAC - MIXTURE-MODEL CLUSTERING OF CASES'/) 10200 FORMAT(/' COVARIANCE MATRIX: '/) 21000 FORMAT(//' STATISTICS FOR WHOLE (UNCLUSTERED) SAMPLE: ') 22000 FORMAT(' ---------- --- ----- ------------- ------ '//) 37000 FORMAT(/' MINUS 2 LOG LIKELIHOOD =',F13.1 ) 10999 FORMAT(/' PROBLEM TITLE IS' ) 11000 FORMAT(1X,18A4/) 11001 FORMAT(/1X,18A4/) 15500 FORMAT(/1X,'MAXIMUM OF EACH VARIABLE: ',/) 16000 FORMAT(/1X,'MINIMUM OF EACH VARIABLE: ',/) 12000 FORMAT(2X,I4) 11100 FORMAT(1X,I2,'/',I2,'/',I4,3X,'AT ',I2,':',I2,':',I2/) 17000 FORMAT(3X,I2) 14000 FORMAT(//1X,'CLUSTERING:'/) 15000 FORMAT(18A4) 18000 FORMAT(1X, (7(A6,':',I2,2X)/) ) 24000 FORMAT(/,1X,'CASES AND LABELS:--'/) 20000 FORMAT(//1X,'INITIAL MEANS'/) 25000 FORMAT(1X, 'I=',I4,' PPS: ', 9F8.3) 26000 FORMAT(/,1X,'K = ',I2,' CLUSTERS'/) 28000 FORMAT(1X,'MEAN VECTOR FOR CLUSTER ',I2,': ',(8F13.5/)) 32000 FORMAT(/,1X,'ITERATION ', I2,/ ) C WRITE(6,33000) K,XM2LL(K),IPARM(K),AICVEC(K),AICPPH(K), C XSCHVEC(K),SCHPPH(K) 33000 FORMAT(5X,I1,6X,F8.1, 4X,I4,3X,F7.1,1X,F4.2,3X,F7.1,1X,F4.2) 38000 FORMAT(1X,'MODEL SELECTION CRITERIA'// X' MODEL-SELECTION CRITERIA '/ X' NUMBER MINUS 2 NUMBER (PPH=POST.PROB.OF MODEL):'/ X' OF TIMES OF AKAIKE SCHWARZ '/ X' CLUSTERS, LOG OF PARAM- ------------ ------------ '/ X' K LIKELIHOOD ETERS VALUE PPH VALUE PPH '/ X' --------- ---------- ----- ------- --- ------- ---'//) C 9 XXXX.X 143 XXXX.X .XX XXXX.X .XX C 34000 FORMAT(/,1X,'NUMBERS IN CLUSTERS:',2X,9(I5,1X)/) 36000 FORMAT(/,1X,'MIXING PROBABILITIES:',3X,9(F4.2,1X)//) 42000 FORMAT(//, 1X,'COMMON COVARIANCE MATRIX (MLE):',//) 44000 FORMAT(/' LIST OF CASES, BY CLUSTER'//) 45000 FORMAT(' CLUSTER ',I2) 46000 FORMAT('+',1X,A6) 48000 FORMAT(1X,8F13.5/) 50000 FORMAT(1X,A6,':',I2,(8F13.3/)) 52000 FORMAT(//,1X,'CASE, LABEL / DATA'//) 58000 FORMAT(/1X,'CONVERGENCE: NO CASE CHANGED CLUSTERS AFTER ', X'ITERATION ',I2,'. RESULTS ARE PRINTED BELOW.'//) 60000 FORMAT(/,1X,'JFLG = ',I2,'. IF JFLG=0, COMPUTATION OF DET', X' WENT WELL; OTHERWISE, THERE WAS TROUBLE OR MATRIX WAS ', X'ILL-CONDITIONED.'//) 62000 FORMAT(/1X,'DET = ',E13.5,' IDET = ',I3,5X, X'ACTUAL DET. = DET*10**IDET',//) 68000 FORMAT(' ALGORITHM HAS NOT CONVERGED IN 20 ITERATIONS. STOP') 69000 FORMAT(/' MINUS 2 LOG LIKELIHOOD =',F13.1 ) 70000 FORMAT( ' AIC = ', F13.1 ) 71000 FORMAT( ' SCHWARZ CRITERION = ', F13.1/) 72000 FORMAT(1X, ' NUMBER OF PARAMETERS = ',I4 ) 74000 FORMAT(1X,'NO OBSERVATIONS IN GROUP ',I3,'. END PROCESSING ', X'FOR THIS NUMBER OF CLUSTERS.'/) 76000 FORMAT(' POSTERIOR PROBS OF GROUP MEMBERSHIP FOR 1ST 4 CASES:') C6000 FORMAT(/' TIME BEGAN: ') C9000 FORMAT(/' TIME FINISHED: ') 96000 FORMAT(1X,29F5.2) 98000 FORMAT(/1X,'PROGRAM ENDED NORMALLY.') END C C C SUBROUTINE MATEQ(A,M,N,JFLG,DET,IDET,IV,NRS1,P,LL) C C SUBROUTINE MATEQ IS DMATEQ FROM THE UICC SUBROUTINE LIBRARY. C PAGE 1 C C SUBROUTINE DMATEQ C ***************** C THIS ROUTINE WILL SOLVE A REAL*8 SYSTEM OF LINEAR EQUATIONS,COMPUTE C THE DETERMINANT, WITHOUT UNDERFLOW OR OVERFLOW, OF A REAL*8 MATRIX, C AND/OR INVERT A REAL*8 MATRIX. C CALLING SEQUENCE: C CALL DMATEQ(A,N,IA,JFLG,DET,IDET,IV,NRS,P,IP) WHERE; C A (INPUT) - IS THE REAL*8 MATRIX ON WHICH THE ROUTINE IS C TO WORK. IN THE PROCESS OF COMPUTATION THE C CONTENTS OF THIS MATRIX ARE DESTROYED. C N (INPUT) - IS AN INTEGER*4 VARIABLE WHICH SPECIFIES THE C ORDER OF THE A MATRIX. C IA (INPUT) - IS AN INTEGER*4 VARIABLE WHICH SPECIFIES THE C ACTUAL ROW DIMENSION OF A AS DIMENSIONED IN C THE CALLING PROGRAM. IA MUST BE GREATER THAN C OR EQUAL TO N. C JFLG (OUTPUT) - IS AN INTEGER*4 RETURN CODE VARIABLE. UPON C RETURN FROM DMATEQ IF; C JFLG=0, ALL WENT WELL. C JFLG=1, THE A MATRIX WAS SINGULAR OR NEAR C SINGULAR AND THE COMPUTATIONS COULD NOT BE C COMPLETED. THE CONTENTS OF THE VARIABLES C A, DET, IDET AND P ARE MEANINGLESS. C DET (OUTPUT) - IS A REAL*8 VARIABLE WHICH CONTAINS THE C DETERMINANT OF A. (SEE IDET) C IDET (INPUT) - IS AN INTEGER*4 VARIABLE. ON INPUT IF; C IDET=0, NO DETERMINANT IS CALCULATED. C IDET NOT 0, THE DETERMINANT OF A IS COMPUTED. C ON OUTPUT IDET CONTAINS THE POWER OF 10 C THAT DET SHOULD BE MULTIPLIED BY TO GIVE THE C CORRECT VALUE OF THE DETERMINANT. I.E. C DET(A)=DET*10.0D0**IDET. C IF DET(A) CAN BE COMPUTED WITHOUT UNDER OR C OVERFLOW, THEN IDET=0 OTHERWISE IDET IS SET C TO THE PROPER VALUE SO THAT NO UNDER OR OVER- C FLOW WILL OCCUR IN COMPUTING DET. C IV (INPUT) - IS AN INTEGER*4 WORK ARRAY WHICH SHOULD BE C DIMENSIONED AT LEAST IV(N). C PAGE 2 C C NRS (INPUT) - IS AN INTEGER*4 VARIABLE WITH THE FOLLOWING C INTERPRETATION: C NRS>0, SOLVE A SYSTEM OF LINEAR EQUATIONS C WITH NRS RIGHT HAND SIDES. C NRS=0, INVERT THE A MATRIX. C NRS<0, ONLY COMPUTE THE DETERMINANT OF A. C IN THIS CASE IDET MUST BE DIFFERENT FROM 0. C P (INPUT) - IS A REAL*8 ARRAY WITH THE FOLLOWING INTER- C PRETATION: C IF NRS>0, THEN P CONTAINS THE NRS RIGHT HAND C SIDES STORED BY COLUMNS. IN THIS CASE P MUST C BE DIMENSIONED AT LEAST P(N,NRS). ON RETURN C THE COLUMNS OF P ARE REPLACED BY THE RESPEC- C TIVE SOLUTIONS. C IF NRS=0, THEN P MUST BE DIMENSIONED AT LEAST C P(N,N). ON RETURN P WILL CONTAIN THE INVERSE C OF A. C IF NRS<0,THEN P NEED ONLY BE A DUMMY VARIABLE C IN THIS CASE P IS NEVER ACCESSED BY DMATEQ. C IP (INPUT) - IS AN INTEGER*4 VARIABLE WHICH CONTAINS THE C ACTUAL ROW DIMENSION OF P AS DIMENSIONED IN C THE CALLING PROGRAM. IP MUST BE GREATER THAN C OR EQUAL TO N. C NOTE: IMMEDIATELY ON RETURN FROM DMATEQ THE CONDITION CODE FLAG, C JFLG, SHOULD BE INTERROGATED. IF JFLG=1, THEN THE ROUTINE C COULD NOT COMPUTE A SOLUTION. C METHOD - THE ALGORITHM USED IS GAUSSIAN ELIMINATION WITH PARTIAL C -1 C PIVOTING. IN ESSENCE THE ROUTINE GENERATES A MATRIX L SUCH C -1 C THAT L *A = U, WHERE U IS AN UPPER TRIANGULAR MATRIX. THEN IT C SOLVES THE SYSTEM A*X = P BY MEANS OF THE EQUIVALENT SYSTEM C -1 -1 C U*X = L *A*X = L *P BY BACK SUBSTITUTION. C -1 C THE L MATRIX CAN BE WRITTEN AS A PRODUCT OF THE FORM C -1 C L = L *P *....*L *P WHERE EACH P IS A PERMUTATION C N-1 N-1 1 1 K C MATRIX OBTAINED BY INTERCHANGING AT MOST TWO ROWS OF THE C IDENTITY MATRIX. ( THIS REPRESENTS THE INTERCHANGING OF TWO C ROWS). THE L MATRICES ARE ELIMINATION MATRICES WHICH ARE C K C CHOSEN TO INTRODUCE ZEROS IN THE LAST N-K ENTRIES OF THE K-TH C COLUMN OF THE MATRIX. C PAGE 3 C C -1 -1 C THE CALCULATIONS OF L *A AND L *P ARE DONE BY PERFORMING C THE PERMUTATIONS ON A AND P RESPECTIVELY. THE ACTUAL L AND P C K K C ARE NOT COMPUTED. C SUBROUTINES CALLED: DMATDT C REFERENCE: C G. W. STEWART, INTRODUCTION TO MATRIX COMPUTATIONS, C ACADEMIC PRESS, 1973. REAL*8 A(N,1),DET,P(LL,1) REAL*8 DNORM,DEN,DMULT,DSUM,DISIGN DIMENSION IV(1) NRS=NRS1 IF (NRS.EQ.0) IDET=1 DISIGN=1.0D+00 DET=0.0D+00 JFLG=0 C C JFLG IS A TROUBLE FLAG.UPON EXIT IF JFLG=0 THEN THE MATRIX WAS PROCESS C WITHOUT TROUBLE.IF JFLG=1 EITHER THE MATRIX IS SINGULAR OR TROUBLE C OCCURED.ISIGN=-ISIGN EVERY TIME A ROW IS INTERCHANGED.THIS IS USED TO C INSURE THAT THE DETERMINANT HAS THE PROPER SIGN. C M1=M-1 DO 100 I=1,M 100 IV(I)=I IF (NRS) 500,200,500 200 DO 300 I=1,M DO 300 J=1,M 300 P(I,J)=0.0D+00 DO 400 I=1,M 400 P(I,I)=1.0D+00 NRS=M C C INSTEAD OF ACTUALLY INTERCHANGING ROWS A POINTER ARRAY IS USED TO KEEP C TRACK OF THE ROW POSITIONS. C C BEGIN ELIMINATION LOOP. C 500 DO 1200 K=1,M1 ICOL=K IPCOL=K C C SEARCHING FOR LARGEST ELEMENT IN ABSOLUTE VALUE IN COLUMN K. C DNORM=A(IV(K),K) IFLG=0 KK=K+1 DO 600 J=KK,M IF (DABS(A(IV(J),K)).LE.DABS(DNORM)) GO TO 600 IFLG=1 IPCOL=IV(J) DNORM=A(IPCOL,K) 600 CONTINUE C C IF IFLG=0 NO ROW INTERCHANGE TOOK PLACE.IF IFLG=1 A ROW INTERCHANGE C TOOK PLACE AND THE POINTER ARRAY IV MUST BE UPDATED. C IF (IFLG.EQ.0) GO TO 800 ISAVE=IV(ICOL) IV(ICOL)=IPCOL ICOL1=ICOL+1 DO 700 L=ICOL1,M IF (IV(L).EQ.IPCOL) IV(L)=ISAVE 700 CONTINUE DISIGN=-DISIGN 800 IF (DNORM.EQ.0.0D+00) GO TO 1900 C C BEGIN ELIMINATION OF ROW BELOW IV(K).DEN IS THE PIVOT ELEMENT. C K1=K+1 DO 1100 IM=K1,M C C BEFORE ACTUALLY ELIMINATING WE CHECK TO SEE IF A(IV(IM),K) HAS ALREADY C BEEN ANIHALATED. C IF (A(IV(IM),K).EQ.0.0D+00) GO TO 1100 C C CACULATE ELIMINATION FACTOR. C DMULT=-A(IV(IM),K) C C WE NOW CALCULATE VALUE OF OTHER ELEMENTS IN ROW IV(IM). C DO 900 NN=K1,M 900 A(IV(IM),NN)=(DMULT*A(IV(K),NN))/DNORM+A(IV(IM),NN) IF (NRS.LE.0) GO TO 1100 DO 1000 IN=1,NRS 1000 P(IV(IM),IN)=(DMULT*P(IV(K),IN))/DNORM+P(IV(IM),IN) 1100 CONTINUE 1200 CONTINUE C C CALCULATE VALUE OF DETERMINANT. C IF (A(IV(M),M).EQ.0.0D0) GO TO 1900 DET=DISIGN IF (IDET.NE.0) CALL DMATDT(A,N,M,DET,IV,IDET) IF (DET.EQ.0.0D+00) GO TO 1900 IF (NRS.LE.0) GO TO 2000 C C WE START SOLVING RIGHT HAND SIDES.THE SOLUTION REPLACES THE RIGHT HAND C VECTOR. C 1300 N1=M-1 DO 1600 JJ=1,NRS C C BEGIN BACK SUBSTITUTION. C P(IV(M),JJ)=P(IV(M),JJ)/A(IV(M),M) DO 1500 I=1,N1 DSUM=0.0D+00 DO 1400 J=1,I 1400 DSUM=DSUM-A(IV(M-I),M-J+1)*P(IV(M-J+1),JJ) 1500 P(IV(M-I),JJ)=(P(IV(M-I),JJ)+DSUM)/A(IV(M-I),M-I) 1600 CONTINUE DO 1800 JJ=1,NRS DO 1700 IND=1,M 1700 A(IND,1)=P(IV(IND),JJ) DO 1800 IND=1,M 1800 P(IND,JJ)=A(IND,1) RETURN 1900 JFLG=1 IDET=0 2000 RETURN END SUBROUTINE MATDT(A,IA,N,DET,IV,IDET) C SUBROUTINE MATDT IS DMATDT FROM THE UICC SUBROUTINE LIBRARY. REAL*8 A(IA,1),DET,B,LOG16 INTEGER*4 IV(1),K EQUIVALENCE (B,K) NUM=16777216 LOG16=.120411998265592457D+01 IF (A(IV(N),N).EQ.0.0D+00) GO TO 300 L=0 DO 100 I=1,N B=DABS(A(IV(I),I)) K=K/NUM-64 L=L+K 100 DET=DET*(A(IV(I),I)/16.0D+00**K) B=DABS(DET) K=K/NUM-64 IW=L+K IF ((IW.LT.-64).OR.(IW.GT.63)) GO TO 200 DET=DET*16.0D+00**L IDET=0 GO TO 400 200 DET=DET*16.0D+00**(-K) IDET=L+K B=IDET*LOG16 IDET=B B=B-DFLOAT(IDET) DET=DET*1.0D+01**B GO TO 400 300 DET=0.0D+00 IDET=0 400 RETURN END //GO.SYSIN DD * SIMULATED DATA: 3 CLUSTERS AND ONE OUTLIER (CLUSSIMU DATSRTC1) N=0301 IP=02 (1X,F7.4,1X,F7.4,2X,A4) (1X,F7.4,1X,F7.4) 0.2761 5.3893 A027 0.7708 6.7202 A067 1.2135 6.5741 A030 1.2359 8.1513 A084 1.3619 7.3645 A029 1.5113 7.9240 A092 1.5371 7.4268 B100 1.6123 8.4856 A031 1.6125 6.9551 A063 1.7297 8.8081 A004 1.7434 7.7771 A038 1.8542 7.3462 A047 1.8905 7.4867 A082 1.9367 9.3444 A072 1.9555 10.3404 A006 1.9760 9.4064 A058 1.9931 8.5513 A033 2.0032 8.0778 A083 2.1018 8.0143 A037 2.1035 7.5005 A055 2.1090 9.1833 A002 2.1223 7.0919 A014 2.1247 8.7759 A041 2.1472 8.3165 A090 2.1691 7.3328 A093 2.1793 8.6722 A040 2.1970 9.0271 A003 2.2275 5.9324 A073 2.2569 7.3838 A078 2.3120 7.9016 A069 2.4533 6.8759 A012 2.5045 6.5778 A053 2.5183 8.9478 A068 2.5463 8.3745 A061 2.6207 7.9916 A065 2.6262 8.7592 A007 2.6483 8.5994 A097 2.6686 9.2729 A046 2.6750 9.4615 A021 2.6989 8.8945 A001 2.7381 10.8687 A039 2.7980 7.3205 A005 2.8526 9.9255 A052 2.8540 8.5639 A020 2.9206 9.1332 A008 2.9280 8.7323 A076 2.9366 10.0333 A056 2.9382 9.7908 A015 2.9467 9.8001 A050 2.9854 8.8421 A077 2.9862 9.3963 A032 3.0199 10.9127 A025 3.0204 9.8841 A099 3.0390 9.7667 A086 3.0484 8.4229 A075 3.1554 7.8201 A054 3.1712 8.1585 A044 3.1750 9.3659 A019 3.2097 9.0448 A018 3.2212 9.3711 A074 3.2212 10.3269 A085 3.2338 9.3520 A013 3.2629 10.4462 A064 3.3036 8.6369 A059 3.3073 8.8117 A034 3.3114 9.2779 A011 3.4367 8.3833 A023 3.4429 10.6049 A062 3.4511 8.8238 A035 3.4611 8.0619 A091 3.4862 10.2716 A098 3.6738 9.4348 A057 3.7087 9.6641 A017 3.7104 8.5965 A066 3.7270 9.4421 A024 3.7319 10.2773 A079 3.7533 7.8627 A051 3.7858 9.0805 A088 3.9176 10.9779 A028 3.9452 3.1365 B156 4.0090 9.9223 A045 4.0420 8.8521 A089 4.0429 11.4351 A036 4.1099 4.3631 B116 4.1722 10.9305 A080 4.2004 10.6635 A071 4.2672 10.2335 A016 4.2782 9.9480 A081 4.3567 9.5689 A026 4.3775 10.8224 A095 4.4413 9.6839 A048 4.4421 11.2215 A042 4.4595 10.1874 A094 4.4634 10.7693 A009 4.5020 4.6428 B154 4.5119 3.6492 B160 4.5175 10.1376 A010 4.5565 4.6507 B107 4.5754 5.7088 B110 4.6335 5.4000 B106 4.6665 12.3705 A087 4.6769 4.0792 B123 4.7045 7.1875 B168 4.7792 4.9793 B144 4.8307 7.2868 B150 4.8379 5.1690 B183 4.8417 10.9276 A049 4.8422 4.8430 B113 4.8633 7.5951 B185 4.8701 2.4507 B181 4.8906 4.4778 B164 4.9559 5.3073 B158 4.9703 2.7886 B137 4.9965 6.1795 B189 5.0032 5.7263 B147 5.0470 10.6405 A060 5.1540 12.1235 A022 5.1960 5.5193 B175 5.2004 12.2151 A043 5.2018 7.1457 B125 5.2450 4.9625 B161 5.3193 6.8121 B126 5.3999 6.8334 B178 5.4791 6.1149 B128 5.5460 4.0992 B141 5.5703 6.6023 B192 5.5776 6.2882 B103 5.5992 5.3619 B195 5.6079 4.3876 B148 5.6104 4.1744 B142 5.6188 6.2134 B121 5.6230 4.3966 B188 5.6525 7.5356 B176 5.6606 12.2639 A096 5.6767 5.5578 B198 5.6811 5.8614 B124 5.6881 12.0911 A070 5.7290 5.8473 B157 5.7339 5.6523 B102 5.7457 6.5675 B166 5.7769 5.7275 B118 5.7796 5.8411 B117 5.7849 4.8565 B127 5.8301 5.6686 B101 5.8317 5.4670 B122 5.8320 4.4746 B180 5.8355 5.7646 C200 5.9421 6.7757 B199 5.9898 8.2032 B170 6.0843 7.1658 B162 6.0941 6.3332 B169 6.1266 6.1043 B172 6.1369 4.8187 B140 6.1873 6.8658 B120 6.2314 3.9732 B194 6.2345 6.1021 B165 6.2752 6.6343 B173 6.2789 6.2031 B119 6.3430 7.0022 B139 6.3504 7.0549 B151 6.3590 7.3384 B184 6.3950 7.2153 B187 6.4460 4.2089 B108 6.4658 5.9170 B171 6.4735 6.5399 B132 6.4777 6.9712 B109 6.4791 7.2657 B153 6.5480 6.8241 B114 6.5834 1.3351 C285 6.5857 7.4789 B197 6.6092 5.6457 B155 6.6110 5.8851 B152 6.6111 -0.4222 C216 6.6620 6.8291 B115 6.6678 7.3168 B111 6.6693 6.6074 B179 6.7198 6.6857 B186 6.7483 7.9680 B138 6.7557 8.1510 B105 6.7728 7.3110 B129 6.7908 4.8367 B104 6.8006 6.3869 B159 6.8017 8.1249 B143 6.8141 6.2177 B182 6.8576 5.4143 B135 6.8976 -0.1763 C239 6.9031 5.3664 B134 6.9390 10.4245 B174 6.9576 -0.7034 C236 6.9928 6.7207 B193 7.0097 4.9490 B130 7.0212 7.9016 B167 7.0718 8.0984 B191 7.0740 6.2750 B133 7.0997 1.4973 C208 7.1122 6.4706 B163 7.1525 9.1853 B145 7.1784 8.5443 B131 7.3801 7.7902 B149 7.4251 2.4715 C202 7.4972 1.6852 C229 7.5412 6.1794 B190 7.5592 1.5139 C237 7.5784 1.5276 C218 7.6019 7.1780 B112 7.6035 1.0460 C283 7.6196 1.9003 C288 7.6693 6.2175 B196 7.6968 8.1225 B146 7.7077 1.3951 C260 7.7317 8.1307 B136 7.7670 0.4703 C280 7.8332 2.6719 C271 7.8531 0.6659 C276 7.8653 3.2472 C206 7.8670 1.1965 C268 7.8858 1.4844 C247 7.9575 3.5265 C234 8.0671 1.3459 C220 8.0692 1.9501 C257 8.0850 2.3338 C265 8.0892 3.7387 C225 8.1341 0.1153 C273 8.1349 3.3525 C298 8.1455 2.0826 C279 8.1478 3.6562 C255 8.1884 2.8380 C278 8.1925 3.0875 C287 8.1962 2.9412 C212 8.1993 3.2636 C241 8.2115 1.4779 C256 8.2468 2.9424 C230 8.2716 1.9259 C248 8.2987 1.1993 C213 8.3164 2.3023 C221 8.4338 2.0903 C289 8.4634 2.8649 C297 8.5313 2.4504 C209 8.5544 2.9573 C292 8.6174 2.3449 C228 8.6974 2.0147 C235 8.7109 3.8254 C244 8.7333 2.7704 C296 8.7451 10.0418 B177 8.7677 2.5442 C252 8.8472 3.7338 C258 8.8599 2.8153 C219 8.8707 2.3505 C290 8.8717 2.1382 C224 8.8766 3.1732 C215 8.9037 4.3761 C275 8.9233 1.3367 C242 8.9274 0.8497 C293 8.9641 0.8874 C249 8.9821 3.0246 C254 9.0480 3.6574 C284 9.0510 2.8896 C226 9.0574 2.4418 C214 9.1063 1.6506 C261 9.1064 2.5126 C243 9.1231 1.8793 C210 9.1233 4.1067 C294 9.1253 2.9923 C211 9.1959 2.5771 C245 9.2189 4.4055 C205 9.2866 1.3987 C266 9.3198 3.1858 C204 9.3541 2.7547 C282 9.4596 4.1625 C250 9.6149 3.7007 C295 9.6269 2.3887 C231 9.6645 5.1878 C238 9.6655 4.7029 C227 9.6761 1.9310 C300 9.7460 3.5814 C262 9.7923 1.6110 C269 9.8255 4.1720 C263 9.8314 3.4032 C281 9.8479 1.2231 C217 9.8582 3.3583 C277 9.9400 5.2958 C253 9.9652 3.5748 C264 9.9999 4.2017 C233 10.0159 3.1275 C251 10.0349 4.5835 C201 10.0374 5.4708 C232 10.1718 4.7947 C203 10.2266 4.0297 C222 10.2517 3.9798 C272 10.3215 5.2558 C286 10.3620 6.4328 C259 10.4011 3.5808 C223 10.5548 3.3248 C246 10.7194 5.6256 C240 10.7981 4.8278 C267 10.8478 3.5456 C291 10.8690 5.7404 C207 10.9407 5.2995 C274 11.1513 4.5688 C299 11.1806 5.6310 C270 12.0000 12.0000 D301 /* FROM: CMS DSN = MCUNSTAK.DATASIMU Data for figure in "Metric Considerations ..." paper A: 100 cases from N(3,9) with variances 1 and 2 and corr .7071 B: 100 cases from N(6,6) with variances 1 and 2 and corr .7071 C: 100 cases from N(9,3) with variances 1 and 2 and corr .7071 D: 1 case = (12,12) /*