C C PROGRAM TSSG1CMA C VERSION 6.2 21-DEC-82 C C C PROGRAM TSSG1CMA (Time-Series SeGmentation, 1-variate data C CoMmon variance, Automatic mode) WAS DEVELOPED FROM PROGRAM C ISDT1 (ISoDaTa, 1-variate data), WHICH CLUSTERS UNIVARIATE DATA. C C C PROGRAMMED BY C DR. STANLEY L. SCLOVE 312/996-2681 C INFORMATION & DECISION SCIENCES DEPT 312/996-2676 C UNIVERSITY OF ILLINOIS AT CHICAGO C 601 S MORGAN ST C CHICAGO, IL 60607-7124 C C C C RESTRICTIONS (CAN BE MODIFIED): C C N, SERIES LENGTH (SAMPLE SIZE), AT MOST 999; C K, NUMBER OF CLASSES OF SEGMENT, AT MOST 9; C ITER, MAXIMUM NUMBER OF ITERATIONS, 20. C C C INPUT "CARDS": C C DATASET NAME, IN FORMAT (18A4) C SERIES LENGTH, N, FORMAT (2X,I3) C DATA FORMAT, FMT, FORMAT (18A4), E.G., (10X,F6.3) C DATA, IN FORMAT SPECIFIED BY FMT C DIMENSION X(999),DIST(9),ICLUS(999),IOTA(999),SUM(9) DIMENSION TITLE(18) DIMENSION NG(9),XMEAN(9) DIMENSION FMT(18) DIMENSION SS(9),SSD(9) DIMENSION SD(9),NT(9,9),IRSUM(9),TP(9,9) DIMENSION VAR(9) DIMENSION ICLSOL(999) DIMENSION P(9) DOUBLE PRECISION SSD,SS,SUM,WGSS C C C READ(5,1050) TITLE READ(5,1001) N XN = N WRITE(6,1012) WRITE(6,1090) TITLE WRITE(6,1051) N C READ(5,1050) FMT C C INITIALIZE VARIABLES; SET CONSTANTS. DO 456 INTEG=1,N IOTA(INTEG) = INTEG 456 CONTINUE PI = 3.1415927 NM1 = N-1 DO 100 I = 1,N READ(5,FMT) X(I) IF (I .EQ. 1) GO TO 498 GO TO 499 498 XMAX = X(1) XMIN = X(1) 499 CONTINUE IF (X(I) .LT. XMIN) XMIN=X(I) IF (X(I).GT.XMAX) XMAX=X(I) 100 CONTINUE WRITE(6,1150) XMIN,XMAX WRITE(6,1009) DO 170 I=1,N WRITE(6,1095) I WRITE(6,FMT) X(I) 170 CONTINUE C DO 800 K = 2,9 C PRECEDING STATEMENT MAY BE ALTERED TO THE FORM "DO 800 K = L,L" C IN ORDER TO MAKE A RUN WITH K FIXED AT L. C COMPUTE INITIAL MEANS. DO 101 IG=1,K XMEAN(IG) = XMIN + IG*(XMAX-XMIN)/(K+1) 101 CONTINUE C IN ORDER TO OVERRIDE THE PRECEDING SPECIFICATION OF INITIAL MEANS, C ONE MAY AT THIS POINT INSERT STATEMENTS OF THE FORM C XMEAN(1) = XX.XX C XMEAN(2) = XX.XXX C . C . C . C XMEAN(K) = XX.XXX C XMEAN(1) = XMIN XMEAN(K) = XMAX WRITE(6,1015) K WRITE(6,1010) WRITE(6,1011)( XMEAN(IG), IG=1,K ) C C FOR FIRST ITERATION, MARGINAL DISTRIBUTION OF LABELS IS TAKEN C TO BE UNIFORM. C DO 145 I = 1,K P(I) = 1.0/K 145 CONTINUE C ITER = 1 C 601 CONTINUE IF (ITER .EQ. 1) GO TO 560 DO 565 I = 1,N ICLSOL(I) = ICLUS(I) 565 CONTINUE 560 CONTINUE DO 104 I = 1,N DO 102 L = 1,K DIST(L) = ( XMEAN(L) - X(I) )**2 IF (ITER .EQ. 1) GO TO 102 C FOR FIRST ITERATION, CLASSIFICATION IS SIMPLY BY MINIMUM C DISTANCE. TEST=-DIST(L)/(2.0*VARHAT) IF (TEST .LT. -180.2) GO TO 615 GO TO 610 615 DIST(L) = 0.0 610 CONTINUE C DIST(L) = EXP(-DIST(L)/(2.0*VARHAT)) 105 CONTINUE IF (I .EQ. 1) GO TO 605 IG = ICLUS(I-1) DIST(L) = -TP(IG,L)*DIST(L) GO TO 102 605 CONTINUE C CLASSIFY FIRST OBSERVATION DIST(L) = -DIST(L)*P(L) 102 CONTINUE F = DIST(1) ICLUS(I) = 1 DO 104 L = 2,K IF ( DIST(L).LT.F ) GO TO 103 GO TO 104 103 F = DIST(L) ICLUS(I) = L 104 CONTINUE C C DO 202 IG = 1,K SUM(IG) = 0.0 SS(IG) = 0.0 NG(IG) = 0 202 CONTINUE C DO 400 I = 1,N ITEMP = ICLUS(I) SUM(ITEMP) = SUM(ITEMP) + X(I) SS(ITEMP) = SS(ITEMP) + X(I)**2 NG(ITEMP) = NG(ITEMP) + 1 400 CONTINUE C WGSS = 0.0 DO 401 IG = 1,K IF (NG(IG) .EQ. 0) GO TO 685 GO TO 690 685 WRITE(6,1105) IG GO TO 800 690 CONTINUE XMEAN(IG) = SUM(IG)/NG(IG) NTEMP=NG(IG) IF (NTEMP .EQ. 1) GO TO 135 SSD(IG) = SS(IG) - SUM(IG)**2/NG(IG) VAR(IG) = SSD(IG)/(NG(IG)-1) SD(IG) = SQRT(VAR(IG)) GO TO 140 135 SSD(IG)=0.0 VAR(IG) = 0.0 SD(IG)=0.0 140 CONTINUE WGSS = WGSS + SSD(IG) C 401 CONTINUE C C WGMS = WGSS/(N-K) STDERR = SQRT(WGMS) C IF (ITER .EQ. 1) GO TO 600 DO 555 I = 1,N IF (ICLUS(I) .EQ. ICLSOL(I)) GO TO 555 GO TO 600 555 CONTINUE GO TO 530 600 CONTINUE C C C C C DO 520 I = 1,K DO 520 J = 1,K NT(I,J) = 0 520 CONTINUE DO 110 I = 1,NM1 IOLD = ICLUS(I) IP1 = I + 1 INEW = ICLUS(IP1) NT(IOLD,INEW) = NT(IOLD,INEW) + 1 110 CONTINUE DO 150 I = 1,K IRSUM(I) = 0 150 CONTINUE DO 120 I=1,K DO 120 J = 1,K IRSUM(I) = IRSUM(I) + NT(I,J) 120 CONTINUE DO 125 I=1,K XDENOM=IRSUM(I) IF (XDENOM .EQ. 0.0) GO TO 585 GO TO 590 585 WRITE(6,1105) I GO TO 800 590 CONTINUE C DO 125 J = 1,K XNUM=NT(I,J) TP(I,J) = XNUM/XDENOM 125 CONTINUE 126 CONTINUE C C COMPUTE MARGINAL DISTRIBUTION OF LABELS. DO 165 IG = 1,K P(IG) = IRSUM(IG) P(IG) = P(IG)/N 165 CONTINUE C C TRANS=0.0 DO 515 I=1,K DO 515 J=1,K ITEST = NT(I,J) IF (ITEST .EQ. 0) GO TO 515 IF (TP(I,J) .EQ. 0.) GO TO 515 TRANS = TRANS + NT(I,J)*ALOG(TP(I,J)) 515 CONTINUE TRANS = -2.0*TRANS C ACCOUNT FOR FIRST OBSERVATION'S LABEL. LABEL1 = ICLUS(1) PROB = P(LABEL1) FIRST = -2.0*ALOG(PROB) NOPARM = K + 1 + K*(K-1) WRITE(6,1035) ITER C EXCEPT FOR FIRST ITERATION, COMPUTE AIC BASED ON C NEW CLUSTERING AND OLD VARIANCE. IF (ITER .EQ. 1) GO TO 575 XMN2LL = N + N*ALOG(2.0*PI*VARHAT) AICOLD = XMN2LL + 2.0*NOPARM SCHOLD = XMN2LL + ALOG(XN)*NOPARM C C XKASHOLD = 0.0 DO 111 IGRP=1,K GRPN = NG(IGRP) XKASHOLD= XKASHOLD+ ALOG(GRPN) 111 CONTINUE XKASHOLD= XKASHOLD + SCHOLD C 575 CONTINUE WRITE(6,1004) (IOTA(I), I=1,N) WRITE(6,1005) (ICLUS(I), I=1,N) IF (ITER .EQ. 1) GO TO 580 WRITE(6,1160) AICOLD WRITE(6,1165) SCHOLD WRITE(6,1111) XKASHOLD 580 CONTINUE C VARHAT IS MLE OF VARIANCE. VARHAT = WGSS/N C COMPUTE AIC BASED ON NEW CLUSTERING AND NEW VARIANCE. XMN2LL = N + N*ALOG(2.0*PI*VARHAT) XMN2LL = XMN2LL + TRANS + FIRST AIC = XMN2LL + 2.0*NOPARM SCH = XMN2LL + ALOG(XN)*NOPARM C XKASH = 0.0 DO 211 IGRP=1,K GRPN = NG(IGRP) C C XKASH = XKASH + ALOG(GRPN) 211 CONTINUE XKASH = XKASH + SCH WRITE(6,1025) WGSS, XMN2LL, WGMS WRITE(6,1085) STDERR WRITE(6,1020) (XMEAN(IG),IG=1,K) WRITE(6,1040) (SUM(IG),IG=1,K) WRITE(6,1045) (NG(IG),IG=1,K) WRITE(6,1080) (VAR(IG),IG=1,K) WRITE(6,1055) (SD(IG), IG=1,K) WRITE(6,1100) VARHAT WRITE(6,1065) DO 115 I = 1,K WRITE(6,1060) (NT(I,J),J=1,K) 115 CONTINUE WRITE(6,1070) DO 130 I=1,K WRITE(6,1075) (TP(I,J),J=1,K) 130 CONTINUE WRITE(6,1255) (P(I),I=1,K) WRITE(6,1200) TRANS WRITE(6,1210) NOPARM WRITE(6,1205) AIC WRITE(6,1206) SCH WRITE(6,1211) XKASH ITER = ITER + 1 IF (ITER.GE.21) GO TO 495 GO TO 601 495 WRITE(6,1215) GO TO 800 C C 1001 FORMAT(2X,I3) 1004 FORMAT(1X,'POINT: '/, (1X,40I3)) 1005 FORMAT(1X,'CLUSTER: '/, (1X,40I3)) 1009 FORMAT(1X,'DATA'/ (1X, 5(2X,I2,F6.2))) 1010 FORMAT(/1X,'INITIAL MEANS') 1011 FORMAT(1X, 9F13.2/) C000000000111111111122222222223333333333444444444455555555556666666666777 C123456789012345678901234567890123456789012345678901234567890123456789012 1012 FORMAT('1',1X,'===============================================', A'======================================================', B/,1X,'ISOPAC / TSPAC / TSSG1CMA'/ C1X,'DEVELOPED AND PROGRAMMED BY DR. STANLEY L. SCLOVE' D/,1X,'VERSION 6.2 21-DEC-82'//) 1015 FORMAT('1',1X,'K = ',I1,' CLASSES OF SEGMENT'/) 1020 FORMAT(1X,'MEANS: ',9F13.2) 1025 FORMAT(/1X,'WGSS = ',F15.4,' MINUS 2 LOG LIKELIHOOD = ', XF16.5, ' WGMS = ',F15.4/) 1035 FORMAT(//,1X,'ITERATION ', I2) 1040 FORMAT(1X,'SUMS:',6X,9F13.2) 1045 FORMAT(1X,'NUMBERS:',3X,9(I10,3X)) 1050 FORMAT(18A4) 1051 FORMAT(1X,'N = ',I3/) 1055 FORMAT(1X,'STD.DEVS.: ',9F13.2) 1060 FORMAT(1X,9I7) 1065 FORMAT(/1X,'TRANSITIONS'/) 1070 FORMAT(/1X,'TRANSITION PROBABILITIES'/) 1075 FORMAT(1X,9F7.4) 1080 FORMAT(1X,'VARIANCES: ',9F13.2) 1085 FORMAT(1X,'STD.ERROR=SQRT(WGMS) = ',F13.4/) 1090 FORMAT(1X,18A4) 1095 FORMAT(1X,I3) 1100 FORMAT(/,1X, 'M.L. ESTIMATE OF COMMON VARIANCE = ',F14.5/) 1105 FORMAT(/,1X,'PROGRAM STOPPED BECAUSE OF NO OBSERVATIONS', A' IN CLASS ',I2/) 1150 FORMAT(1X,'MIN = ',F15.4,5X,'MAX = ',F15.4/) 1160 FORMAT(/,1X,'AIC = ', F14.4, A1X,'(FROM NEW CLUSTERING AND VARIANCE THAT PRODUCED IT)') 1165 FORMAT(/,1X,'SCHWARZ CRITERION = ', E14.4, A1X,'(FROM NEW CLUSTERING AND VARIANCE THAT PRODUCED IT)') 1111 FORMAT(/,1X,'KASHYAP CRITERION = ', E14.4, A1X,'(FROM NEW CLUSTERING AND VARIANCE THAT PRODUCED IT)') 1200 FORMAT(//,1X,'CONTRIBUTION OF TRANS. PROBS. TO ', X'LOG LIKELIHOOD = ',E14.4/) 1205 FORMAT(1X,'AIC = ', E14.4, A1X, '(FROM NEW CLUSTERING AND VARIANCE IT PRODUCED)'/) 1206 FORMAT(1X,'SCHWARZ CRITERION = ', E14.4, A1X, '(FROM NEW CLUSTERING AND VARIANCE IT PRODUCED)'/) 1211 FORMAT(1X,'KASHYAP CRITERION = ', E14.4, A1X, '(FROM NEW CLUSTERING AND VARIANCE IT PRODUCED)'/) 1210 FORMAT(/1X,'NUMBER OF PARAMETERS = ',I4/) 1215 FORMAT(1X,'STOPPED: ALGORITHM HAS NOT CONVERGED', A' IN 20 ITERATIONS.') 1220 FORMAT(1X,'RESULTS DID NOT CHANGE IN ITERATION NUMBER',I3,'.', A/,1X, 'PROGRAM RAN ', I2,' SEGMENTS SUCCESSFULLY.'/) 1255 FORMAT(/,1X,'MARGINAL PROB. VECTOR:',9F11.4) 530 CONTINUE WRITE(6,1220) ITER, K 800 CONTINUE STOP END