CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C C C CLUSPAC: Computer Programs for Mixture-Model Clustering C C C C COPYRIGHT 1991 STANLEY LOUIS SCLOVE. C C C C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C CMS DSN = ISDT1 ISOPAC C C C PROGRAM ISDT1 C FOR CLUSTERING UNIVARIATE DATA (CLUSTERING ON THE LINE). C MANUAL MODE: NUMBER OF CLUSTERS AND INITIAL MEANS ARE C INPUT. USE PROGRAM ISDT1A FOR AUTOMATIC SETTING OF C NUMBER OF CLUSTERS AND INITIAL MEANS. C C C PROGRAMMED BY C DR. STANLEY L. SCLOVE 312/996-2681 C DEPARTMENT OF C INFORMATION & DECISION SCIENCES 312/996-2676 C COLLEGE OF BUSINESS ADMINISTRATION C UNIVERSITY OF ILLINOIS AT CHICAGO C BOX 4348, CHICAGO, IL 60680-4348 C C C LAST UPDATE: VERSION 2.5 9-JUN-87 C NEXT-TO-LAST UPDATE: VERSION 2.4 1-AUG-83 C C COPYRIGHT 1991 STANLEY LOUIS SCLOVE. C C C C RESEARCH SUPPORTED IN PART BY: C ONR CONTRACT N00014-80-C-0408, TASK NR042-443 C ARO CONTRACT DAAG29-82-K-0155 C C RESTRICTIONS (CAN BE MODIFIED): C N, SAMPLE SIZE, AT MOST 9999; C K, NUMBER OF CLUSTERS, AT MOST 9; C ITER, MAXIMUM NUMBER OF ITERATIONS, 20. C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C DIMENSION X(9999),F(9999),ICLUS(9999),IOTA(9999) DIMENSION D(29),C(29),SUM(29) DIMENSION TITLE(18) DIMENSION B(29),NG(29),XMEAN(29) DIMENSION FMT(18) DIMENSION SS(29),SSD(29) DIMENSION SD(29) DIMENSION VAR(29) DIMENSION ICLSOL(9999) DOUBLE PRECISION SUM,SS C C CONTROL CARDS: C C DATASET TITLE C N, IN FORMAT (2X,I4) C FMT, IN FORMAT (18A4), E.G., (1X,F4.1) C ALLOW AT LEAST ONE BLANK IN FMT: IT WILL ALSO BE USED C FOR OUTPUT. C DATA, IN FORMAT SPECIFIED BY FMT C K, NUMBER OF CLUSTERS, IN FORMAT (2X,I1) C K INITIAL MEANS, IN FORMAT SPECIFIED BY FMT C C READ(5,1050) TITLE C C WRITE PROGRAM INFORMATION. WRITE(6,1012) WRITE(6,1013) WRITE(6,1014) C WRITE(6,1060) TITLE C C READ SAMPLE SIZE, N. READ(5,1001) N XN = N WRITE(6,1051) N C C READ DATA FORMAT. READ(5,1050) FMT C C READ DATA. TOTAL=0.0 SUMSQS=0.0 SSDEVS=0.0 C DO 100 I = 1,N READ(5,FMT) X(I) TOTAL = TOTAL + X(I) SUMSQS = SUMSQS + X(I)*X(I) IF(I .EQ. 1) GO TO 498 GO TO 499 498 XMAX = X(1) XMIN = X(1) 499 CONTINUE IF(X(I) .LT. XMIN) XMIN=X(I) IF(X(I) .GT. XMAX) XMAX=X(I) 100 CONTINUE WRITE(6,1150) XMIN,XMAX XBAR = TOTAL/XN SSDEVS = SUMSQS- TOTAL*TOTAL/XN VARHAT = SSDEVS/XN WRITE(6,1112) XBAR WRITE(6,1113) VARHAT PI = 3.1415927 TEMP = 2.0*PI*SSDEVS/N XMN2LL = N*(1.0 + ALOG(TEMP)) STDDEV = SQRT(VARHAT) WRITE(6,1121) SSDEVS, XMN2LL, STDDEV NOPARM = 1 + 1 AIC = XMN2LL + 2.0*NOPARM WRITE(6,1205) AIC SCH = XMN2LL + ALOG(XN)*NOPARM XKASH = SCH + ALOG(XN) WRITE(6,1111) XKASH C WRITE(6,1009) WRITE(6,FMT) (X(I),I=1,N) C C READ K, NUMBER OF CLUSTERS. READ(5,1000) K WRITE(6,1015) K C C READ INITIAL MEANS DO 101 IG=1,K READ(5,FMT) XMEAN(IG) 101 CONTINUE C WRITE(6,1010) C WRITE(6,1011)( XMEAN(IG), IG=1,K ) C SET CONSTANTS. DO 105 INTEG=1,N IOTA(INTEG) = INTEG 105 CONTINUE ITER = 1 601 CONTINUE IF(ITER .EQ. 1) GO TO 560 DO 565 I = 1,N ICLSOL(I) = ICLUS(I) 565 CONTINUE C COMMENCE DISTANCE COMPUTATIONS. 560 CONTINUE DO 104 I = 1,N DO 102 L = 1,K D(L) = ( XMEAN(L) - X(I) )**2 102 CONTINUE F(I) = D(1) ICLUS(I) = 1 DO 104 L = 2,K IF( D(L) .LT. F(I) ) GO TO 103 GO TO 104 103 F(I) = D(L) ICLUS(I) = L 104 CONTINUE WRITE(6,1003) C WRITE(6,1004) (IOTA(I), I=1,N) WRITE(6,1005) (ICLUS(I), I=1,N) DO 202 IG = 1,K SUM(IG) = 0.0 SS(IG) = 0.0 C NG(IG) = 0 202 CONTINUE DO 400 I = 1,N IGROUP = ICLUS(I) SUM(IGROUP) = SUM(IGROUP) + X(I) SS(IGROUP) = SS(IGROUP) + X(I)*X(I) NG(IGROUP) = NG(IGROUP) + 1 400 CONTINUE WGSS = 0.0 DO 401 IG = 1,K IF (NG(IG) .EQ. 0) GO TO 410 GO TO 415 410 WRITE(6,1235) IG GO TO 1995 415 CONTINUE XMEAN(IG) = SUM(IG)/NG(IG) NTEMP=NG(IG) IF (NTEMP .EQ. 1) GO TO 135 SSD(IG) = SS(IG) - SUM(IG)**2/NG(IG) VAR(IG) = SSD(IG)/(NG(IG)-1) SD(IG) = SQRT(VAR(IG)) GO TO 140 135 SSD(IG)=0.0 VAR(IG) = 0.0 SD(IG)=0.0 140 CONTINUE WGSS = WGSS + SSD(IG) C 401 CONTINUE C TEMP = 2.0*PI*WGSS/N XMN2LL = N*(1.0 + ALOG(TEMP)) C WGMS = WGSS/(N-K) WRITE(6,1021) WGSS, XMN2LL, WGMS STDERR = SQRT(WGMS) WRITE(6,1085) STDERR C KM1 = K-1 DO 500 IG=1,KM1 IGP1 = IG+1 C B(IG) IS BOUNDARY BETWEEN G-TH AND G+1-ST CLASSES. B(IG) = ( XMEAN(IG) + XMEAN(IGP1) )/2.0 500 CONTINUE WRITE(6,1036) ITER WRITE(6,1035) (B(IG),IG=1,KM1) WRITE(6,1020) (XMEAN(IG),IG=1,K) IF(ITER .EQ. 1) GO TO 600 DO 555 I = 1,N IF(ICLUS(I) .EQ. ICLSOL(I)) GO TO 555 GO TO 600 555 CONTINUE GO TO 530 600 CONTINUE ITER = ITER + 1 IF(ITER.GE.21) GO TO 570 GO TO 601 570 WRITE(6,1160) 1995 STOP C C 530 CONTINUE C WRITE(6,1040) (SUM(IG),IG=1,K) WRITE(6,1045) (NG(IG),IG=1,K) WRITE(6,1080) (VAR(IG),IG=1,K) WRITE(6,1055) (SD(IG), IG=1,K) C VARHAT IS MLE OF VARIANCE. VARHAT = WGSS/N WRITE(6,1100) VARHAT C C WRITE(6,1095) WRITE(6,1090) (X(I),ICLUS(I), I=1,N) C C COMPUTE MODEL-SELECTION CRITERIA: NOPARM = K + 1 WRITE(6,1210) NOPARM AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM C XKASH = 0.0 DO 111 IGRP=1,K GRPN = NG(IGRP) XKASH= XKASH+ ALOG(GRPN) 111 CONTINUE XKASH = XKASH + SCH C WRITE(6,1205) AIC WRITE(6,1206) SCH WRITE(6,1111) XKASH C C 1000 FORMAT(2X,I1) 1001 FORMAT(2X,I4) 1003 FORMAT(1X,'CLUSTERING') 1004 FORMAT(1X,'POINT: '/, (1X,40I3)) 1005 FORMAT(1X,'CLUSTER: '/, (1X,40I3)) 1009 FORMAT(1X,'DATA'/) 1010 FORMAT(//1X,'INITIAL MEANS') 1011 FORMAT(1X, 9F13.2//) 1100 FORMAT(1X, 'M.L. ESTIMATE OF COMMON VARIANCE = ',F14.5/) 1111 FORMAT(/,1X,'KASHYAP CRITERION = ', E14.4) 1112 FORMAT(/,1X,'MEAN = ', E14.4) 1113 FORMAT(1X, 'M.L. ESTIMATE OF VARIANCE = ',F14.5/) 1121 FORMAT(//1X,'SSDEVS = ',E14.4,' MINUS 2 LOG LIKELIHOOD = ', XE15.5, ' STDDEV = ',E14.4//) 1012 FORMAT('1','****************************************', A//,1X,'PROGAM ISDT1 '/ B,1X,'FOR CLUSTERING UNIVARIATE DATA (DATA ON THE LINE)'/ C1X,'DEVELOPED AND PROGRAMMED BY DR. STANLEY L. SCLOVE' D/,1X,'VERSION 2.5 9-JUN-87 '//) 1013 FORMAT(1X,'PROGRAM ISOPAC:ISDT1') 1014 FORMAT(1X,'COPYRIGHT 1991 STANLEY LOUIS SCLOVE.'/) 1015 FORMAT('1',1X,'K = ',I1,' CLUSTERS') 1020 FORMAT(1X,'MEANS: ',9F13.2) 1021 FORMAT(//1X,'WGSS = ',E14.4,' MINUS 2 LOG LIKELIHOOD = ', XE15.5, ' WGMS = ',E14.4//) 1035 FORMAT(1X,'BOUNDARIES:', 9X, 9F13.2) 1036 FORMAT(1X,'ITERATION ', I2) 1040 FORMAT(1X,'SUMS:',6X,9F13.2) 1045 FORMAT(1X,'NUMBERS:',3X,9(I10,3X)) 1050 FORMAT(18A4) 1051 FORMAT(1X,'N = ',I3/) 1055 FORMAT(1X,'STD.DEVS.: ',9F13.2) 1060 FORMAT(1X,18A4) 1080 FORMAT(1X,'VARIANCES: ',9F13.2) 1085 FORMAT(1X,'STD.ERROR=SQRT(WGMS) = ',F13.4//) 1090 FORMAT( (8(F6.1,I2)/)) 1095 FORMAT(1X//,1X,'DATA AND LABELS'//) C 1150 FORMAT(/1X,'MIN = ',E14.4,5X,'MAX = ',E14.4/) 1160 FORMAT(1X,'ISDT1 HAS NOT CONVERGED IN 20 ITERATIONS. STOP') 1205 FORMAT(1X,'AIC = ', E15.5//) 1206 FORMAT(1X,'SCHWARZ CRITERION = ', E15.5//) 1210 FORMAT(//1X,'NUMBER OF PARAMETERS = ',I4//) 1235 FORMAT(1X,'NO OBSERVATIONS IN GROUP ',I3,'. STOP') STOP END //GO.SYSIN DD * DUDA AND HART, P. 195 N=0025 (3X,F6.3) 01 +0.608 2 02 -1.590 1 03 +0.235 2 04 +3.949 2 05 -2.249 1 06 +2.704 2 07 -2.473 1 08 +0.672 2 09 +0.262 2 10 +1.072 2 11 -1.773 1 12 +0.537 2 13 +3.240 2 14 +2.400 2 15 -2.499 1 16 +2.608 2 17 -3.458 1 18 +0.257 2 19 +2.569 2 20 +1.415 2 21 +1.410 2 22 -2.653 1 23 +1.396 2 24 +3.286 2 25 -0.712 1 K=2 M1 -1.000 M2 +1.000 /*