5 COMMON X(), S1(), S2(), R(), P1(), P2(), P3(), F1(), F2(), CHARST(), RANGE() 10 CLS 20 PRINT " STATISTICAL ECOLOGY: A PRIMER ON METHODS AND COMPUTING " 30 PRINT " " 40 PRINT " I N T E R A C T I V E B A S I C P R O G R A M " 50 PRINT 60 PRINT " C L U S T E R . B A S " 70 PRINT " --------------------------------------------------------------" 80 PRINT " THIS PROGRAM COMPUTES: " 90 PRINT 100 PRINT " A CLUSTER ANALYSIS of N sampling units (SUs) based on " 110 PRINT " abundance data of S species. " 115 PRINT " --------------------------------------------------------------" 130 PRINT " The program includes the following options: " 140 PRINT 150 PRINT " 1. CLUSTER ALGORITHM OR STRATEGIES " 160 PRINT " (Centroid, Median, Group Average and Flexible) " 170 PRINT 180 PRINT " 2. Q-MODE Measure of SU RESEMBLANCE " 190 PRINT " (10 Distance Indices - Chap. 14) " 200 PRINT " --------------------------------------------------------------" 210 PRINT 220 PRINT " " 230 PRINT : PRINT "HIT ANY KEY TO CONTINUE" 240 IF INKEY$ = "" THEN 240 250 CLS 260 COUNTER = 0 270 PRINT "--------------PART I. SPECIFICATION OF OPTIONS------------" 280 PRINT 290 PRINT " Option #1: Q-Mode Distance Indices: " 300 PRINT " 1) E-GROUP DISTANCES " 310 PRINT " 1= Euclidean Distance (ED) [Eq. 14.1] " 320 PRINT " 2= Squared Euclidean Distance (SED) [Eq. 14.2] " 330 PRINT " 3= Mean Euclidean Distance (MED) [Eq. 14.3] " 340 PRINT " 4= Absolute Distance (AD) [Eq. 14.4] " 350 PRINT " 5= Mean Absolute Distance (MAD) [Eq. 14.5] " 360 PRINT " 2) BS-GROUP DISTANCES " 370 PRINT " 6= Percent Dissimilarity (PD) [Eq. 14.6c] " 380 PRINT " 3) RE-GROUP DISTANCES " 390 PRINT " 7= Relative Euclidean Distance (RED)[Eq. 14.7] " 400 PRINT " 8= Relative Absolute Distance (RAD) [Eq. 14.8] " 410 PRINT " 9= Chord Distance (CRD) [Eq. 14.9a] " 420 PRINT " 10= Geodesic Distance (GDD) [Eq. 14.10] " 430 INPUT "ENTER choice of DISTANCE index (1-10): ", C1 440 PRINT " " 450 PRINT "Option #2: CLUSTERING STRATEGIES: " 460 PRINT " 1 = CENTROID (unweighted) PAIR-GROUP method " 470 PRINT " 2 = CENTROID (weighted) / MEDIAN PAIR-GROUP method) " 480 PRINT " 3 = GROUP AVERAGE (unweighted PAIR-GROUP method) " 490 PRINT " 4 = FLEXIBLE (weighted PAIR-GROUP variable BETA method) " 500 INPUT " ENTER choice of STRATEGY (1-4): ", C2 510 IF C2 = 4 THEN INPUT " Flexible Strategy -- ENTER Value of BETA....", B 520 CLS 530 CYCLE = CYCLE + 1 540 'IF CYCLE=1 THEN GOSUB 2670 550 IF CYCLE < 2 THEN GOSUB 2680 ELSE FOR I = 1 TO N: P1(I) = I: P2(I) = 1: F1(I) = I: NEXT I 560 CLS 570 REM - LOOPS FOR SUMS AND SUMS OF SQS. FOR RELATIVE DISTANCES 580 REM - IF DISTANCE OPTIONS 8-10 ARE SPECIFIED. 590 IF C1 < 6 THEN GOTO 700 600 FOR I = 1 TO N 610 S1(I) = 0 620 S2(I) = 0 630 FOR J = 1 TO K 640 S1(I) = S1(I) + X(I, J) 650 S2(I) = S2(I) + (X(I, J) * X(I, J)) 660 NEXT J 670 NEXT I 680 REM - LOOPS TO CALCULATE DISTANCES BETWEEN ALL PAIR-WISE 690 REM - COMBINATIONS OF SAMPLING UNITS 700 PRINT "-------PART II. PAIR-WISE DISTANCES BETWEEN SUs ------------": PRINT 710 GOSUB 3270 720 M = N - 1 730 M1 = 1 740 FOR I = 1 TO M 750 L1 = M1 760 FOR N1 = I TO M 770 L = N1 + 1 780 D = 0 790 'REM - EUCLIDEAN DISTANCES: STANDARD, AVERAGE AND SQUARED 800 IF C1 > 3 THEN GOTO 890 810 FOR J = 1 TO K 820 G = X(I, J) - X(L, J) 830 D = D + (G * G): 'SED - EQ. 14.2 840 NEXT J 850 IF C1 = 1 THEN D = SQR(D): 'ED - EQ. 14.1 860 IF C1 = 3 THEN D = SQR(D / K): 'MED - EQ. 14.3 870 GOTO 1220 880 'REM - ABSOLUTE DISTANCES: STANDARD AND AVERAGE 890 IF C1 > 6 THEN GOTO 960 900 FOR J = 1 TO K 910 D = D + ABS(X(I, J) - X(L, J)): 'AD - EQ. 14.4 920 NEXT J 930 IF C1 = 5 THEN D = D / K: 'MAD - EQ. 14.5 940 IF C1 = 6 THEN D = D / (S1(I) + S1(L)): 'PD - EQ. 14.6C 950 GOTO 1220 960 IF C1 <> 8 THEN GOTO 1010 970 FOR J = 1 TO K 980 D = D + ABS((X(I, J) / S1(I)) - (X(L, J) / S1(L))): 'RAD - EQ. 14.8 990 NEXT J 1000 GOTO 1220 1010 IF C1 > 7 THEN GOTO 1080 1020 FOR J = 1 TO K 1030 G = (X(I, J) / S1(I)) - (X(L, J) / S1(L)) 1040 D = D + (G * G) 1050 NEXT J 1060 D = SQR(D): 'RED - EQ. 14.7 1070 GOTO 1220 1080 S3 = 0 1090 FOR J = 1 TO K 1100 S3 = S3 + (X(I, J) * X(L, J)) 1110 NEXT J 1120 S4 = S3 / SQR(S2(I) * S2(L)) 1130 D = SQR(2 * (1 - S4)): 'CRD - EQ. 14.9A 1140 IF C1 = 9 THEN GOTO 1220 1150 'REM - GEODESIC METRIC CALCULATIONS 1160 IF S4 < .01 THEN S4 = .01 1170 'REM - CHORD SINE AND TANGENT 1180 S5 = SQR(1 - (S4 * S4)) 1190 S6 = S5 / S4 1200 IF S6 > 1000 THEN S6 = 1000 1210 D = ATN(S6): 'GDD - EQ. 14.10 1220 REM place computed values into array 1230 R(M1) = D 1240 M1 = M1 + 1 1250 NEXT N1 1260 PRINT " SU"; P1(I); ")"; 1270 PRINT " PAIRED DISTANCES with other SUs" 1280 N2 = M1 - 1 1290 XXX = I 1300 FOR J = L1 TO N2 1310 XXX = XXX + 1 1320 PRINT P1(XXX); ")"; 1330 PRINT USING "######.## "; R(J); 1340 NEXT J 1350 PRINT "" 1360 COUNTER = COUNTER + 1 1370 IF COUNTER < 5 THEN GOTO 1410 1380 PRINT : PRINT "HIT ANY KEY TO CONTINUE" 1390 IF INKEY$ = "" THEN 1390 1400 COUNTER = 0 1410 NEXT I 1420 PRINT " " 1430 PRINT : PRINT "HIT ANY KEY TO CONTINUE" 1440 IF INKEY$ = "" THEN 1440 1450 CLS 1460 REM - OUTPUT PROGRAM HIERARCHIAL CLUSTERING STRATEGY OPTION 1470 PRINT "------------PART III. CLUSTER ANALYSIS OUTPUT --------------------" 1480 PRINT 1490 IF C2 = 1 THEN PRINT "CLUSTERING is by the CENTROID (Unweighted) STRATEGY" 1500 IF C2 = 2 THEN PRINT "CLUSTERING is by the CENTROID (Weighted)/ MEDIAN STRATEGY" 1510 IF C2 = 3 THEN PRINT "CLUSTERING is by the GROUP-AVERAGE STRATEGY" 1520 IF C2 = 4 THEN PRINT "CLUSTERING is by the FLEXIBLE STRATEGY " 1530 IF C2 = 4 THEN PRINT " with BETA = "; B 1540 GOSUB 3270 1550 PRINT " " 1560 REM - SET THE PARAMETERS FOR THE STRATEGY OPTION 1570 ON C2 GOTO 1600, 1650, 1700, 1750 1580 REM - CENTROID STRATEGY PARAMETERS - INITIALIZED HERE,BUT VARY 1590 REM - DURING THE CLUSTERING CYCLES 1600 A1 = .5 1610 A2 = .5 1620 B = -.25 1630 GOTO 1800 1640 REM - MEDIAN STRATEGY PARAMETERS - CONSTANT FOR ALL CYCLES 1650 A1 = .5 1660 A2 = .5 1670 B = -.25 1680 GOTO 1800 1690 REM - GROUP-AVERAGE STRATEGY PARAMETERS - INITIALIZED HERE, BUT 1700 A1 = .5 1710 A2 = .5 1720 B = 0 1730 GOTO 1800 1740 REM - FLEXIBLE STRATEGY PARAMETERS - DEPEND ON USER INPUT OF BETA 1750 A1 = .5 + ABS(B / 2) 1760 A2 = A1 1770 REM - TO FIND THE MINIMUM DISTANCE BETWEEN OTUS, OR GROUPS 1780 REM - AND OUTPUT OF GROUPS,CLUSTERING LEVEL AND REFERENCE OTU 1790 REM - AND FOR REDUCING THE DISTANCE MATRIX FOR THE GROUPED OTU,S 1800 PRINT "Cluster No. Cluster Ref. SUs " 1810 PRINT "CYCLE GRPS LEVEL S.U. in the Group" 1820 PRINT "----- ---- ------ ------- ------------------------------------" 1830 N5 = 0 1840 M = N - 1 1850 REM - LOOP TO FIND THE MINIMUM DISTANCE 1860 FOR L5 = 1 TO M 1870 N5 = N5 + 1 1880 N6 = N - N5 1890 R2 = 999999! 1900 M1 = 1 1910 FOR I = 1 TO M 1920 FOR N1 = I TO M 1930 L = N1 + 1 1940 IF R(M1) < R2 THEN GOTO 1960 1950 GOTO 2000 1960 H1 = I 1970 H2 = L 1980 F3 = M1 1990 R2 = R(F3) 2000 M1 = M1 + 1 2010 NEXT N1 2020 NEXT I 2030 REM - CALCULATE VARYING CENTROID AND GROUP-AVERAGE STRATEGY PARAM 2040 ON C2 GOTO 2060, 2140, 2110, 2140 2050 REM - CENTROID PARAMETERS 2060 A1 = P2(H1) / (P2(H1) + P2(H2)) 2070 A2 = P2(H2) / (P2(H1) + P2(H2)) 2080 B = -A1 * A2 2090 GOTO 2140 2100 REM - GROUP-AVERAGE PARAMETERS 2110 A1 = P2(H1) / (P2(H1) + P2(H2)) 2120 A2 = P2(H2) / (P2(H1) + P2(H2)) 2130 REM - LOOP TO FIND THE INDICES OF THE OTU,S IN EACH GROUP 2140 N2 = 0 2150 FOR I = 1 TO N 2160 IF F1(I) = H1 OR F1(I) = H2 THEN GOTO 2180 2170 GOTO 2210 2180 N2 = N2 + 1 2190 P3(N2) = P1(I) 2200 F1(I) = H1 2210 NEXT I 2220 P2(H1) = N2 2230 REM - OUTPUT OF GROUPS, CLUSTERING LEVELS & OTU,S IN EACH GROUP 2240 PRINT USING "####"; N5; 2250 PRINT USING "######"; N6; 2260 PRINT USING "#####.##"; R2; 2270 PRINT USING "#######"; P1(H1); 2280 FOR I = 2 TO N2 2290 PRINT USING "#####"; P3(I); 2300 NEXT I 2310 PRINT " " 2320 REM - LOOP TO REDUCE THE MATRIX (SET REDUCED ROW & COL TO 99999) 2330 REM - THE NEW REFERENCE OTU IS THE ONE WITH THE SMALLEST INDEX 2340 R3 = B * R(F3) 2350 M3 = 1 2360 M1 = 1 2370 FOR I = 1 TO M 2380 M2 = 1 2390 FOR N1 = I TO M 2400 L = N1 + 1 2410 IF I = H1 THEN GOTO 2530 2420 IF (L = H1) THEN F2(M3) = M1 2430 IF I = H2 OR L = H2 THEN GOTO 2450 2440 GOTO 2560 2450 IF I > H1 THEN M3 = M3 + 1 2460 N3 = F2(M3) 2470 REM - THE FOLLOWING UPDATE EQUATION IS USED TO FIND THE NEW 2480 REM - DISTANCES BETWEEN INDIVIDUALS AND/OR GROUPS WITHOUT 2490 REM - RECALCULATING NEW DISTANCES FROM THE ORIGINAL DATA MATRIX 2500 R(N3) = ((A1 * R(N3)) + (A2 * R(M1))) + R3 2510 R(M1) = 999999! 2520 GOTO 2560 2530 IF (L = H2) THEN GOTO 2510 2540 M2 = M2 + 1 2550 F2(M2) = M1 2560 M1 = M1 + 1 2570 NEXT N1 2580 NEXT I 2590 NEXT L5 2600 PRINT USING "####"; N5; 2610 PRINT USING "######"; N6; 2620 PRINT USING "#####.##"; R2; 2630 PRINT USING "#######"; P1(H1); 2640 PRINT " ALL SU's FORM ONE GROUP " 2650 PRINT : PRINT 2660 INPUT "Run SAME dataset but with different OPTIONS (ANSWER Y/N)"; ANS$ 2670 IF ANS$ = "Y" OR ANS$ = "y" THEN GOTO 250 ELSE END 2680 REM -----------SUBROUTINE FOR DATA ENTRY--------------------------- 2690 PRINT "Data INPUT options: " 2700 PRINT " Option 1 - Data already exists in a STORED data file " 2710 PRINT " Option 2 - Data is to be manually entered from keyboard " 2720 PRINT " (and subsequently STORED, if desired) " 2730 PRINT 2740 INPUT "Choose OPTION: Enter 1 OR 2 "; OPT 2750 PRINT "" 2760 INPUT "ENTER the NUMBER of SAMPLING UNITS (N) ? ", N 2770 INPUT "ENTER the NUMBER of SPECIES (S) ? ", K 2780 NNN = (N * (N - 1)) / 2 2790 DIM X(N, K), S1(N), S2(N), R(NNN), P1(N), P2(N), P3(N) 2800 DIM F1(N), F2(N), CHARST(K), RANGE(K) 2810 IF OPT = 1 THEN GOTO 3030 ELSE IF OPT <> 2 THEN GOTO 2740 2820 PRINT 2830 PRINT "The data matrix is created one COLUMN (or SU) at a time. " 2840 PRINT 2850 FOR SU = 1 TO N 2860 P1(SU) = SU 2870 FOR SP = 1 TO K 2880 PRINT "SPECIES "; SP; " in SU "; P1(SU) 2890 INPUT ".."; X(SU, SP) 2900 P2(SU) = 1: F1(SU) = SU 2910 NEXT SP: NEXT SU 2920 INPUT "STORE THIS DATASET ON DISK ? (ENTER Y for YES / N for NO) "; A$ 2930 IF A$ = "N" OR A$ = "n" THEN GOTO 3120 ELSE GOTO 2940 2940 INPUT "Specify a name for this DATASET (e.g., SAMPLE.DAT) "; OUTDAT$ 2950 INPUT "Specify DISK drive: ENTER A, B, C, etc. "; DD$ 2960 OUTDAT$ = DD$ + ":" + OUTDAT$ 2970 OPEN OUTDAT$ FOR OUTPUT AS #1 2980 FOR SU = 1 TO N 2990 FOR SP = 1 TO K 3000 WRITE #1, X(SU, SP) 3010 NEXT SP: NEXT SU 3020 GOTO 3120 3030 INPUT "Specify name of DATA File (e.g., SAMPLE.DAT) "; DATAFIL$ 3040 INPUT "Specify DISK DRIVE where located: A, B, C, etc. "; DD$ 3050 FILENAM$ = DD$ + ":" + DATAFIL$ 3060 OPEN "I", #1, FILENAM$ 3070 FOR SU = 1 TO N 3080 P2(SU) = 1: F1(SU) = SU: P1(SU) = SU 3090 FOR SP = 1 TO K 3100 INPUT #1, X(SU, SP) 3110 NEXT SP: NEXT SU 3120 INPUT "Would you like to list the DATA ? (Y for Yes / N for NO) "; A$ 3130 IF A$ = "Y" OR A$ = "y" THEN GOTO 3140 ELSE GOTO 3250 3140 PRINT "LISTING OF THE DATA SET. ROW=SUs, COL=Species": PRINT 3160 FOR SP = 1 TO K 3180 FOR SU = 1 TO N 3190 PRINT X(SU, SP); 3200 NEXT SU: PRINT 3210 NEXT SP: PRINT 3230 PRINT "PRESS ANY KEY TO CONTINUE" 3240 IF INKEY$ = "" THEN 3240 3250 CLOSE #1 3260 RETURN 3270 IF C1 = 1 THEN PRINT "Index is Euclidean Distance (ED) [Eq. 14.1] " 3280 IF C1 = 2 THEN PRINT "Index is Squared Euclidean Distance (SED) [Eq. 14.2]" 3290 IF C1 = 3 THEN PRINT "Index is Mean Euclidean Distance (MED) [Eq. 14.3] " 3300 IF C1 = 4 THEN PRINT "Index is Absolute Distance (AD) [Eq. 14.4] " 3310 IF C1 = 5 THEN PRINT "Index is Mean Absolute Distance (MAD) [Eq. 14.5] " 3320 IF C1 = 6 THEN PRINT "Index is Percent Dissimilarity (PD) [Eq. 14.6c] " 3330 IF C1 = 7 THEN PRINT "Index is Relative Euclidean Distance (RED) [Eq. 14.7]" 3340 IF C1 = 8 THEN PRINT "Index is Relative Absolute Distance (RAD) [Eq. 14.8] " 3350 IF C1 = 9 THEN PRINT "Index is Chord Distance (CRD) [Eq. 14.9a] " 3360 IF C1 = 10 THEN PRINT "Index is Geodesic Distance (GDD) [Eq. 14.10] " 3370 PRINT 3380 RETURN