CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C C C CLUSPAC: Computer Programs for Mixture-Model Clustering C C C C COPYRIGHT 1991 STANLEY LOUIS SCLOVE. C C C C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C C C CMS DSN = ISDT1DTA ISOPAC C C C C C C PROGRAM ISDT1DTA OF ISOPAC IS FOR C C CLUSTERING UNIVARIATE DATA (CLUSTERING ON THE LINE). C C IT ALLOWS DIFFERENT VARIANCES IN CLUSTERS; THE C C OBSERVATION X IS ASSIGNED TO THAT CLUSTER FOR WHICH C C LOG VAR + (X-MEAN)**2/VAR IS MINIMAL. C C AUTOMATIC MODE: VARYING NUMBER OF CLUSTERS; INITIAL MEANS C C ARE SET AUTOMATICALLY. C C C C PROGRAMMED BY C C DR. STANLEY L. SCLOVE 312/996-2681 C C PROFESSOR C C INFORMATION & DECISION SCIENCE DEPT. M/C 294 C C COLLEGE OF BUSINESS ADMINISTRATION C C UNIVERSITY OF ILLINOIS AT CHICAGO C C BOX 4348, CHICAGO, IL 60680-4348 C C C C C C VERSION 1.2 24-OCT-89 C C C C C C RESTRICTIONS (CAN BE MODIFIED): C C N, SAMPLE SIZE, AT MOST 9999; C C K, NUMBER OF CLUSTERS, AT MOST 29; C C ITER, MAXIMUM NUMBER OF ITERATIONS, 20. C C C C C C CONTROL CARDS: C C DATASET TITLE, IN FORMAT(18A4) C C N, IN FORMAT (2X,I4) C C FMT, IN FORMAT (18A4), E.G., (F4.1) C C DATA, IN FORMAT SPECIFIED BY FMT C C C C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C DIMENSION X(9999),F(9999),ICLUS(9999),IOTA(9999) DIMENSION SUM(29) DIMENSION TITLE(18) DIMENSION A(29),B(29),C(29),D(29),NG(29),XMEAN(29) DIMENSION BDY1(29),BDY2(29),ROOT1(29),ROOT2(29) DIMENSION FMT(18) DIMENSION SS(29),SSD(29) DIMENSION SD(29) DIMENSION VAR(29) DIMENSION ICLSOL(9999) DIMENSION AICVEC(29),SCHVEC(29),XKSVEC(29) DOUBLE PRECISION SUM,SS,SSD,SD C C C READ DATASET TITLE. READ(5,1050) TITLE C READ SAMPLE SIZE, N. READ(5,1001) N XN = N C C C SET CONSTANTS: PI = 3.141593 DO 105 INTEG=1,N IOTA(INTEG) = INTEG 105 CONTINUE C C WRITE PROGRAM INFORMATION. WRITE(6,1120) WRITE(6,1060) TITLE C WRITE(6,1200) N C C C READ DATA AND COMPUTE STATISTICS FOR WHOLE SAMPLE: C SUM1 = 0.0 SS1 = 0.0 DO 100 I = 1,N READ(5,*) X(I) C SUM1 = SUM1 + X(I) SS1 = SS1 + X(I)*X(I) C IF(I .EQ. 1) GO TO 498 GO TO 499 498 XMAX = X(1) XMIN = X(1) 499 CONTINUE IF(X(I) .LT. XMIN) XMIN=X(I) IF(X(I) .GT. XMAX) XMAX=X(I) 100 CONTINUE WRITE(6,1150) XMIN,XMAX C XMEAN1 = SUM1/N SSD1 = SS1 - SUM1*SUM1/N VAR1 = SSD1/N SD1 = SQRT(VAR1) WRITE(6,3000) XMEAN1,VAR1,SD1 TEMP = 2.0*PI*SSD1/N XMN2LL = N*(1.0 + ALOG(TEMP)) NOPARM = 2 AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM XKASH = SCH - ALOG(2*VAR1**3) AICVEC(1) = AIC SCHVEC(1) = SCH XKSVEC(1) = XKASH WRITE(6,1215) WRITE(6,1220) AIC,SCH,XKASH C C C PERFORM CLUSTERING FOR K = 2 TO 9 GROUPS. C DO 995 K = 2,9 WGSS = 0.0 WRITE(6,1015) K C C COMPUTE INITIAL MEANS, EQUALLY SPACED THROUGH RANGE OF DATA. DO 101 IG=1,K XMEAN(IG) = XMIN + IG*(XMAX-XMIN)/(K+1) 101 CONTINUE C C DO 110 IGRP = 1,K VAR(IGRP) = 1.0 110 CONTINUE C WRITE(6,1010) C WRITE(6,1110)( XMEAN(IG), IG=1,K ) ITER = 1 601 CONTINUE IF(ITER .EQ. 1) GO TO 560 DO 565 I = 1,N ICLSOL(I) = ICLUS(I) 565 CONTINUE C COMMENCE DISTANCE COMPUTATIONS. C "DISTANCE" HERE MEANS NORMALIZED SQUARED DISTANCE PLUS C FOR FIRST ITERATION, VARIANCES ARE SET EQUAL TO ONE C (SO "DISTANCE" IS JUST SQUARED DISTANCE FROM THE MEAN). 560 CONTINUE DO 104 I = 1,N DO 102 L = 1,K D(L) = ( XMEAN(L) - X(I) )**2/VAR(L) + ALOG(VAR(L)) 102 CONTINUE F(I) = D(1) ICLUS(I) = 1 DO 104 L = 2,K IF( D(L) .LT. F(I) ) GO TO 103 GO TO 104 103 F(I) = D(L) ICLUS(I) = L 104 CONTINUE C IF (N .GE. 31) GO TO 200 WRITE(6,1003) C WRITE(6,1004) (IOTA(I), I=1,N) WRITE(6,1005) (ICLUS(I), I=1,N) 200 CONTINUE C DO 202 IG = 1,K SUM(IG) = 0.0 SS(IG) = 0.0 C NG(IG) = 0 202 CONTINUE DO 400 I = 1,N IGROUP = ICLUS(I) SUM(IGROUP) = SUM(IGROUP) + X(I) SS(IGROUP) = SS(IGROUP) + X(I)*X(I) NG(IGROUP) = NG(IGROUP) + 1 400 CONTINUE WGSS = 0.0 DO 401 IG = 1,K IF (NG(IG) .EQ. 0) GO TO 410 GO TO 415 410 WRITE(6,3100) IG GO TO 1995 415 CONTINUE XMEAN(IG) = SUM(IG)/NG(IG) NTEMP=NG(IG) IF (NTEMP .EQ. 1) GO TO 135 SSD(IG) = SS(IG) - SUM(IG)**2/NG(IG) VAR(IG) = SSD(IG)/NG(IG) SD(IG) = SQRT(VAR(IG)) GO TO 140 135 SSD(IG)=0.0 C IF NG(IG)=1, VAR(IG) IS SET EQUAL TO POOLED VARIANCE ESTIMATE C TO GIVE COMPUTATION A WAY OF PROCEEDING: VAR(IG) = VARHAT SD(IG)=SQRT(VARHAT) 140 CONTINUE WGSS = WGSS + SSD(IG) C 401 CONTINUE C TEMP = 0.0 DO 420 IG = 1,K TEMP = TEMP + NG(IG)*( 1.0 + ALOG(2*PI*VAR(IG)) ) 420 CONTINUE XMN2LL = TEMP C WGMS = WGSS/(N-K) WRITE(6,1025) XMN2LL C WRITE POOLED STATISTICS: WRITE(6,1030) WGSS, WGMS STDERR = SQRT(WGMS) WRITE(6,1085) STDERR C KM1 = K-1 DO 500 IG=1,KM1 IGP1 = IG+1 C B(IG) IS BOUNDARY BETWEEN G-TH AND G+1-ST CLASSES. IF ( VAR(IG) .NE. VAR(IGP1) ) GO TO 495 BDY1(IG) = ( XMEAN(IG)+XMEAN(IGP1) )/2.0 BDY2(IG) = BDY1(IG) GO TO 500 495 CONTINUE A(IG)=1/VAR(IG)-1/VAR(IGP1) B(IG)=-2*(XMEAN(IG)/VAR(IG)-XMEAN(IGP1)/VAR(IGP1)) C(IG) = ALOG(VAR(IG)/VAR(IGP1)) D(IG) = B(IG)**2 - 4*A(IG)*C(IG) ROOT1(IG) = (-B(IG) - SQRT(D(IG)))/(2*A(IG)) ROOT2(IG) = (-B(IG) + SQRT(D(IG)))/(2*A(IG)) BDY1(IG) = ROOT1(IG) BDY2(IG) = ROOT2(IG) 500 CONTINUE WRITE(6,1036) ITER WRITE(6,1035) (BDY1(IG),IG=1,KM1) WRITE(6,1034) (BDY2(IG),IG=1,KM1) WRITE(6,1020) (XMEAN(IG),IG=1,K) IF(ITER .EQ. 1) GO TO 600 DO 555 I = 1,N IF(ICLUS(I) .EQ. ICLSOL(I)) GO TO 555 GO TO 600 555 CONTINUE GO TO 530 600 CONTINUE ITER = ITER + 1 IF(ITER.GE.21) GO TO 570 GO TO 601 570 WRITE(6,1160) 1995 GO TO 995 C C 530 CONTINUE C WRITE(6,1040) (SUM(IG),IG=1,K) WRITE(6,1045) (NG(IG),IG=1,K) WRITE(6,1080) (VAR(IG),IG=1,K) WRITE(6,1055) (SD(IG), IG=1,K) C VARHAT IS MLE OF VARIANCE. VARHAT = WGSS/N WRITE(6,1100) VARHAT C C WRITE(6,1095) WRITE(6,1090) (X(I),ICLUS(I), I=1,N) C C COMPUTE MODEL-SELECTION CRITERIA: C NOPARM = K + K WRITE(6,1210) NOPARM AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM C XKASH = 0.0 DO 211 IGRP=1,K TERM = -ALOG(2*VAR(IGRP)**3) C C XKASH = XKASH + TERM 211 CONTINUE XKASH = XKASH + SCH AICVEC(K) = AIC SCHVEC(K) = SCH XKSVEC(K) = XKASH WRITE(6,3500) AIC WRITE(6,3600) SCH WRITE(6,3700) XKASH C C GO ON TO NEXT VALUE OF K: 995 CONTINUE C C WRITE VALUES OF MSC'S FOR VARIOUS K: WRITE(6,3400) DO 605 K = 1,9 WRITE(6,3300) K, AICVEC(K), SCHVEC(K), XKSVEC(K) 605 CONTINUE C C C 1001 FORMAT(2X,I4) 1003 FORMAT(1X,'CLUSTERING') 1004 FORMAT(1X,'POINT: '/, (1X,40I3)) 1005 FORMAT(1X,'CLUSTER: '/, (1X,40I3)) 1060 FORMAT(1X,18A4) 1010 FORMAT(//1X,'INITIAL MEANS') 1110 FORMAT(1X, 9F13.2//) 1120 FORMAT('1','****************************************', A//,1X,'PROGRAM ISDT1DTA ISOPAC'/ B,1X,'FOR CLUSTERING UNIVARIATE DATA (DATA ON THE LINE)'/ C1X,'COPYRIGHT 1991 STANLEY LOUIS SCLOVE.' D/,1X,'VERSION 1.2 24-OCT-89'//) 1015 FORMAT('1',1X,'K = ',I1,' CLUSTERS') 1020 FORMAT(1X,'MEANS: ',9F13.2) 1025 FORMAT(//1X,' MINUS 2 LOG LIKELIHOOD = ', F15.2) 1030 FORMAT(//1X,'POOLED STATISTICS'/' WGSS =',F15.2,' WGMS =',F15.2/) 1035 FORMAT(1X,'BOUNDARIES:', 7X, 9F13.2) 1034 FORMAT(1X,'BOUNDARIES:', 7X, 9F13.2) 1036 FORMAT(1X,'ITERATION ', I2) 1040 FORMAT(1X,'SUMS:',6X,9F13.2) 1045 FORMAT(1X,'NUMBERS:',3X,9(I10,3X)) 1050 FORMAT(18A4) 1055 FORMAT(1X,'STD.DEVS.: ',9F13.2) 1080 FORMAT(1X,'VARIANCES: ',9F13.2) 1085 FORMAT(1X,'STD.ERROR=SQRT(WGMS) = ',F13.6//) 1090 FORMAT( (8(F6.1,I2)/)) 1095 FORMAT(1X//,1X,'DATA AND LABELS'//) 1100 FORMAT(1X, 'M.L. ESTIMATE OF COMMON VARIANCE = ',F14.5/) C 1150 FORMAT(/1X,'MIN = ',E14.4,5X,'MAX = ',E14.4/) 1160 FORMAT(1X,'PROGRAM HAS NOT CONVERGED IN 20 ITERATIONS. STOP') 1200 FORMAT(1X,'N = ',I3/) 3500 FORMAT(1X,'AIC = ', E15.5/) 3600 FORMAT(1X,'SCHWARZ CRITERION = ', E15.5/) 3700 FORMAT(1X,'KASHYAP CRITERION = ', E15.5/) 1210 FORMAT(//1X,'NUMBER OF PARAMETERS = ',I4/) 1215 FORMAT(/1X,'VALUES OF MODEL-SELECTION CRITERIA FOR WHOLE SAMPLE:') 1220 FORMAT(/1X,'AKAIKE:',E15.5,' SCHWARZ: ',E15.5,' KASHYAP: ',E15.5/) 3000 FORMAT(//1X,'FOR WHOLE SAMPLE: MEAN IS',F15.3,' VARIANCE IS', XF15.3,' STANDARD DEVIATION IS ',F15.3/) 3100 FORMAT(1X,'NO OBSERVATIONS IN GROUP ',I3,'. STOP') 3300 FORMAT(1X,'K=',I3, 3F15.2) 3400 FORMAT(1X,'MODEL SELECTION CRITERIA'/ X' AIC SCHWARZ KASHYAP '/) STOP END