#-------------------------------------------------------------------# # This program is to visualize clustering structures interactively # using MDS and MST. The hierarchical clustering methods used are # "Single, Complete, Average, Centroid, Median and Ward" linkages. # The roles of this program : # 1) Interactive visualization of cluster structures # 2) Visualize individual observations in each clustering methods # 3) Compare clustering methods pairwise. # The paper related to this program is "Interactive Visualization of # Hierarchical Clusters Using MDS and MST" # - Sungsoo Kim, Dianne Cook, Sunhee Kwon - # # S-plus Ver. 3.2 (For Unix) # Last Update : 6-1-99 #---------------------------------------------------------------------# MstTree <- function( n, distance) { # ---------------------------------------------------------------# # This function is made by Sungsoo Kim using the algorithm of # Prim(1957) to make the minimal spanning tree # Input - n : number of data # distance : distance vector # output- mst : minimal spanning obs. corresponding to obs. 2-n # dist: corresponding distance # node: number of nodes of obs. 1-n # ---------------------------------------------------------------# MD <- distance dlarge <- max(MD) + 1 MA <- array( 0, n) MB <- MA MC <- MA for( i in 2:n ) { MA[i] <- 0 MB[i] <- 0 MC[i] <- dlarge } j <- 1 for( i in 2:n ) { minimum <- dlarge for( k in 2:n ) { if( MA[k] == 0 ) { if( j > k ) kk <- n*(k-1) - k*(k-1)/2 + j-k else kk <- n*(j-1) - j*(j-1)/2 + k-j length <- MD[kk] if( length < MC[k] ) { MC[k] <- length MB[k] <- j } if( minimum > MC[k] ) { minimum <- MC[k] nextValue <- k } } } j <- nextValue MA[j] <- 1 } # This Routine is to decide the Degree of node(observations) MN <- array(1, n) MN[1] <- 0 # MN[2:n] <- 1 for(i in 2:n) MN[MB[i]] <- MN[MB[i]] + 1 mst <- MB[2:n] dist <- MC[2:n] node <- MN[1:n] result <- list(mst=mst, dist=dist, node=node) result } mdsCalculate <- function(distance) { # ---------------------------------------------------------------# # function to calculate the coordinates of 2-dim classical MDS # input - distance : distance vector # output- x,y : coordinates of (x-y) of MDS # ---------------------------------------------------------------# md <- cmdscale( distance, eig=T) mdpoint <- md$points mdeig <- md$eig x <- mdpoint[,1] y <- mdpoint[,2] result <- list(x=x, y=y) result } mdsPlotRem <- function(x,y,mst,labName, tname) { # ---------------------------------------------------------------# # function to plot the MDS superimposed by MST # input - x,y : coordinate of MDS # mst : the results of MST # labName : label of obs. # tname : title name # output - none # ---------------------------------------------------------------# rx <- range(x) ry <- range(y) par( lty=1, col=5, pty="s" ) plot(x,y, type="n", xlim=rx, ylim=ry, xlab="", ylab="") abline(h=0,v=0, lty=2) text(x,y, labName ) segments( x[seq(mst)+1], y[seq(mst)+1], x[mst], y[mst]) title(tname) } SelectObsPlot <- function(x,y,mst,labName, tname) { # ---------------------------------------------------------------# # function to select obs's to remove # ---------------------------------------------------------------# rx <- range(x) ry <- range(y) par(mfrow=c(1,1) ) par( lty=1, col=5, pty="s" ) plot(x,y, type="n", xlim=rx, ylim=ry, xlab="", ylab="") abline(h=0,v=0, lty=2) text(x,y, labName ) segments( x[seq(mst)+1], y[seq(mst)+1], x[mst], y[mst]) title(main=tname) NoR <- identify(x,y,plot=FALSE) lenR <- length(NoR) result <- list(NoR = NoR, lenR = lenR) result } mdsPlot2Dim <- function(x,y,mst,labName) { # ---------------------------------------------------------------# # function to plot the MDS superimposed by MST and select rotation # input - x,y : coor. of MDS # mst : the results of MST # labName : label of obs. # output - x,y : modified coor. of MDS # ---------------------------------------------------------------# rx <- range(x) ry <- range(y) par( lty=1, col=5, pty="s" ) plot(x,y, type="n", xlim=rx, ylim=ry, xlab="", ylab="") abline(h=0,v=0, lty=2) text(x,y, labName ) segments( x[seq(mst)+1], y[seq(mst)+1], x[mst], y[mst]) title(" MDS with MST") cat("\n --------------------------------------") cat("\n Want to Rotate (N/y) ? : ") rotateAxis <- readline() if(rotateAxis == "Y" || rotateAxis == "y") { cat("\n ------------------------------------") cat("\n Rotate about Y-axis : 1 ") cat("\n Rotate about X-axis : 2 ") cat("\n Rotate about Origin : 3 ") cat("\n Exchange (X-Y) : 4 ") cat("\n ------------------------------------") cat("\n Type the number : ") rotateSelect <- readline() rotateSelect <- as.integer(rotateSelect) if(rotateSelect == 1) x <- -x else if(rotateSelect == 2) y <- -y else if(rotateSelect == 3) { x <- -x ; y <- -y } else if(rotateSelect == 4) { imsi.x <- x ; x <- y ; y <- imsi.x } mdsPlot2Dim(x,y, mst, labName) } result <- list(x=x, y=y) result } CompareclstPlotting <- function(x,y,Nr, mst,node,mdist,distMF,clsMat, mstF, nodeF,mdistF,clsMatF,labN,name) { # ---------------------------------------------------------------# # function to call plot of clusters w/wo removing observations # input - x,y : coor. of MDS # Nr : removing obs. # mst : mst in case of removing obs. # node: node in case of removing obs. # mdist: mst distance in case of removing obs. # distMF: distance Matrix for full obs. # clsMat: cluster Matrix in case of removing obs. # mstF : mst in case of Full obs. # nodeF: node in case of Full obs. # mdistF: distance in case of Full obs. # clsMatF: cluster Matrix in case of Full obs. # labN : label of obs. # name : type of cluster # output - None # ---------------------------------------------------------------# xr <- x[-Nr] yr <- y[-Nr] labName <- labN[-Nr] distM <- distMF[-Nr, -Nr] n <- length(x) - 2 cat("\n\n No. of Clst(2-",n,")(To stop, only RETURN): ", sep="") numClst <- readline() if(numClst == "") break numC <- as.integer(numClst) numC <- numC - 1 No <- paste(Nr, collapse=",") t.name <- paste(name,"-link", sep="") tr.name <- paste(t.name, ": Removed obs.", No, sep="") clstSubPlotting(x,y,distMF,mstF, nodeF, mdistF, clsMatF,numC, labN, name, t.name) clstSubPlotting(xr,yr,distM,mst, node, mdist, clsMat,numC, labName, name, tr.name) CompareclstPlotting(x,y,Nr, mst,node,mdist,distMF,clsMat, mstF, nodeF,mdistF,clsMatF,labN,name) } CompareMethodPlotting <- function(x,y,distM,labName,mst,node,mdist, clsMat1,clsMat2,NR=0,name1,name2) { # ---------------------------------------------------------------# # function to call plot the two types of clusters w/wo removing # observations # input - x,y : coor. of MDS # distM: distance Matrix for obs. # labName : label of obs. # mst : mst for obs. # node: node for mst # mdist: distance for mst # clsMat1: cluster Matrix for the first case # clsMat2: cluster Matrix for the second case # NR : removing obs.(default : full obs.) # name1 : first cluster # name2 : second cluster # output - None # ---------------------------------------------------------------# n <- length(x) - 1 cat("\n\n No. of Clst(2-",n,")(To stop, only RETURN): ", sep="") numClst <- readline() if(numClst == "") break numC <- as.integer(numClst) numC <- numC - 1 if( NR == 0 ) { t1.name <- paste(name1,"-link", sep="") t2.name <- paste(name2,"-link", sep="") } else { NRP <- paste(NR, collapse=",") t1.name <- paste(name1,"-link: Removed obs. ",NRP,sep="") t2.name <- paste(name2,"-link: Removed obs. ",NRP,sep="") } par(mfrow=c(1,2)) clstSubPlotting(x,y,distM,mst, node, mdist, clsMat1,numC, labName, name1, t1.name) clstSubPlotting(x,y,distM,mst, node, mdist, clsMat2,numC, labName, name2, t2.name) CompareMethodPlotting(x,y,distM,labName,mst,node,mdist, clsMat1,clsMat2,NR,name1,name2) } CompareMethodPlottingRemoving <- function(xF,yF,distMF,labNF,mstF,nodeF, mdistF,cM1F,cM2F,distM,labN,mst,node,mdist, cM1,cM2, No.R, name1,name2) { x <- xF[-No.R] y <- yF[-No.R] n <- length(x) - 1 cat("\n\n No. of Clst(2-",n,")(To stop, only RETURN): ", sep="") numClst <- readline() if(numClst == "") break numC <- as.integer(numClst) numC <- numC - 1 t1F.name <- paste(name1,"-link", sep="") t2F.name <- paste(name2,"-link", sep="") NRP <- paste(No.R, collapse=",") t1.name <- paste(name1,"-link: Removed obs. ",NRP,sep="") t2.name <- paste(name2,"-link: Removed obs. ",NRP,sep="") par(mfrow=c(2,2)) clstSubPlotting(xF,yF,distMF,mstF, nodeF, mdistF, cM1F,numC, labNF, name1, t1F.name) clstSubPlotting(xF,yF,distMF,mstF, nodeF, mdistF, cM2F,numC, labNF, name2, t2F.name) clstSubPlotting(x,y,distM,mst, node, mdist, cM1,numC, labN, name1, t1.name) clstSubPlotting(x,y,distM,mst, node, mdist, cM2,numC, labN, name2, t2.name) CompareMethodPlottingRemoving(xF,yF,distMF,labNF,mstF,nodeF,mdistF, cM1F,cM2F, distM,labN,mst,node,mdist, cM1,cM2, No.R, name1,name2) } clstPlotting <- function(x,y, distM,mst, node, mdist, clsMat, labName, name) { # ---------------------------------------------------------------# # function to call plot the clusters # input - x,y : coor. of MDS # distM: distance Matrix for obs. # mst : mst for obs. # node: node for mst # mdist: distance for mst # clsMat: cluster Matrix # labName : label of obs. # name : type of cluster # output - None # ---------------------------------------------------------------# n <- length(x) - 1 cat("\n\n No. of Clst(2-",n,")(To Stop, only RETURN): ", sep="") numClst <- readline() if(numClst == "") break numClst <- as.integer(numClst) numC <- numClst - 1 t.name <- paste(name,"-link", sep="") clstSubPlotting(x,y,distM,mst, node, mdist, clsMat,numC, labName, name, t.name) clstPlotting(x,y, distM,mst, node, mdist, clsMat, labName, name) } clstSubPlotting <- function(x,y,distM,mst, node, mdist, clsMat, numC, labName, name, t.name) { # ---------------------------------------------------------------# # function to plot the clusters discriminated by MST # input - x,y : coor. of MDS # distM: distance Matrix for obs. # mst : mst for obs. # node: node for mst # mdist: distance for mst # clsMat: cluster Matrix # numC : number of cluster # labName : label of obs. # name : type of cluster # t.name : title name # output - None # ---------------------------------------------------------------# clsInfo <- clsMat[,numC] n <- length(mst) + 1 if( name == "Single") { obs <- c(2:n) eq.obs <- obs[clsInfo[obs] == clsInfo[mst]] m1.obs <- eq.obs - 1 eq.mst <- mst[m1.obs] neq.obs <- obs[-m1.obs] m2.obs <- neq.obs - 1 neq.mst <- mst[m2.obs] } else # "Complete" or "Average , etc { OtherInfo <- OtherClusters(distM, numC, clsInfo, mst, node, mdist) eq.obs <- OtherInfo$eq.obs eq.mst <- OtherInfo$eq.mst neq.obs <- OtherInfo$neq.obs neq.mst <- OtherInfo$neq.mst } # display information of group and indicator obs <- c(1:n) cat("\n Cluster for ", t.name) for(i in 1:(numC+1) ) cat("\n (G=",i,")", labName[obs[clsInfo==i]]) # ----------------------------------------------- rx <- range(x) ry <- range(y) par(col=5, pty="s") plot(x,y, type="n", xlim=rx, ylim=ry, xlab="", ylab="") abline(h=0,v=0, lty=2) par(col=5) text(x,y,labName ) # Color UNIX 1:yellow,2:cyan,3:magenta,4:green,5:blue,6:red # Window 1:black, 2:red, 3:green,4:blue, 5:yellow, 6:magenta par( lty=1, col= 6 ) segments( x[eq.obs], y[eq.obs], x[eq.mst], y[eq.mst]) par( lty=2, col= 5 ) segments( x[neq.obs], y[neq.obs], x[neq.mst], y[neq.mst]) par( col= 5 ) numClst <- numC + 1 numClst <- as.character(numClst) sub.name <- paste("(no.of Cluster: ", numClst," )", sep="" ) title(main=t.name, sub=sub.name) par( lty=1 ) } OtherClusters <- function(distM,numC, clsInfo, mst, node, mdist) { #-------------------------------------------------------# # function to calculate MST to disciminate clusters for # Complete, Average, Centroid, Median and Ward analyses. # input - distM: distance Matrix for obs. # numC : number of cluster # clsInfo: information of cluster # mst : mst for obs. # node: node for mst # mdist: distance for mst # output - eqobs,eqmst : mst connecting the inner point in clsts. # neqobs,neqmst: mst connecting the nearest clusters # ---------------------------------------------------------------# n <- length(mst) + 1 obs <- c(2:n) eq.obs <- obs[clsInfo[obs] == clsInfo[mst]] m1.obs <- eq.obs - 1 eq.mst <- mst[m1.obs] neq.obs <- obs[-m1.obs] m2.obs <- neq.obs - 1 neq.mst <- mst[m2.obs] no.d <- length(neq.obs) if(no.d > numC) { ModyInfo <- modifyMSTforOthers(distM,numC,clsInfo,eq.obs,neq.obs, mst,node,mdist) eq.obs <- ModyInfo$eq.obs eq.mst <- ModyInfo$eq.mst neq.obs <- ModyInfo$neq.obs neq.mst <- ModyInfo$neq.mst } result <- list(eq.obs= eq.obs, eq.mst=eq.mst,neq.obs=neq.obs, neq.mst=neq.mst) result } modifyMSTforOthers <- function(distM,numC,clsInfo,eq.obs,neq.obs, mst,node,mdist) { #---------------------------------------------------------------# # function to calculate modified MST to disciminate clusters # in case of number of discriminating MST greater than no. of clst's # input - distM: distance Matrix for obs. # numC : number of cluster # clsInfo: information of cluster # eq.obs : mst connecting inner point in clst # neq.obs: mst connecting between clusters # mst : mst for obs. # node: node for mst # mdist: distance for mst # output - eq.obs,eq.mst : mst connecting the inner point in clst's. # neq.obs,neq.mst: mst connecting the nearest clut's # ---------------------------------------------------------------# # Define True Difference Point and False Difference Point node <- node[neq.obs] m2.obs <- neq.obs-1 neq.mst <- mst[m2.obs] distance <- mdist[m2.obs] cls1 <- clsInfo[neq.obs] cls2 <- clsInfo[neq.mst] obs.no <- neq.obs n1 <- length(cls1) obs1 <- c(1:n1) nDiff <- (numC+1)*(numC+2)/2 rmobs <- array(0, nDiff) clsM <- matrix(0, ncol=6, nrow=n1, byrow=T) clsM[,1] <- cls1 clsM[,2] <- cls2 clsM[,3] <- distance clsM[,4] <- node clsM[,5] <- obs.no clsM[,6] <- obs1 for(i in 1:n1) { a1 <- min(clsM[i,1], clsM[i,2]) a2 <- max(clsM[i,1], clsM[i,2]) clsM[i,1] <- a1 clsM[i,2] <- a2 } clsM1 <- clsM[order(clsM[,1],clsM[,2]),1:6] # sorting of matrix obs1.no <- clsM1[,5] # Search for observations with minimum distance between groups in MST oldm1 <- clsM1[1,1] oldm2 <- clsM1[1,2] olddist <- clsM1[1,3] oldnode <- clsM1[1,4] oldobs <- 1 k <- 0 for(i in 2:n1) { newm1 <- clsM1[i,1] newm2 <- clsM1[i,2] newdist <- clsM1[i,3] newnode <- clsM1[i,4] if(oldm1 == newm1 && oldm2 == newm2) { if( olddist > newdist ) { olddist <- newdist oldobs <- i } else if(olddist == newdist ) { if( oldnode < newnode ) oldobs <- i } } else { k <- k+1 rmobs[k] <- oldobs oldm1 <- newm1 oldm2 <- newm2 olddist <- newdist oldnode <- newnode oldobs <- i } } k <- k+1 rmobs[k] <- oldobs rmobs <- rmobs[rmobs > 0] neq.obs <- obs1.no[rmobs] neq.mst <- mst[neq.obs-1] #------------------------------------------------ # Find MST for the same clusters #------------------------------------------------ obs <- c(1:length(clsInfo)) eq.obs <- 0 eq.mst <- 0 k <- 0 for(i in 1:(numC+1)) { sameobs.cls <- obs[i == clsInfo] if( length(sameobs.cls) > 1 ) { dM <- distM[sameobs.cls, sameobs.cls] DistanceArray <- MDistanceVectorFtn(dM) mstData <- MstTree(ncol(dM), DistanceArray) samemst <- mstData$mst samemst.no <- sameobs.cls[samemst] sameobs.cls <- sameobs.cls[-1] k <- k + 1 eq.obs <- c(eq.obs,sameobs.cls) eq.mst <- c(eq.mst,samemst.no) if(k == 1) { eq.obs <- eq.obs[-1] eq.mst <- eq.mst[-1] } } } result <- list(eq.obs=eq.obs,eq.mst=eq.mst, neq.obs=neq.obs,neq.mst=neq.mst) result } standardFtn <- function(x) { #-------------------------------------------------# # function of standarzing of raw data #-------------------------------------------------# n <- nrow(x) j <- c(rep(1,n)) j <- matrix(j,ncol=1, byrow=T) meanX <- t(x) %*% j / n Xd <- x - j %*% t(meanX) Cov <- t(Xd) %*% Xd / (n-1) Var <- diag(Cov) SD <- sqrt(Var) SD <- diag(SD) SD <- solve(SD) standX <- Xd %*% SD return(standX) } standard01Ftn <- function(x) { #------------------------------------------# # function of standarzing raw data to 0-1 #------------------------------------------# minX <- apply(x, 2, min) maxX <- apply(x, 2, max) nV <- ncol(x) bX <- maxX - minX standX <- x for(i in 1:nV){ standX[,i] <- ( x[,i] - minX[i] ) / bX[i] } standX } MDistance <- function(n,D) { #------------------------------------------------------------# # function to make full distance Matrix from distance vector #------------------------------------------------------------# MD <- matrix(0, nrow=n, ncol=n) for(j in 1:(n-1) ) for(k in (j+1):n) { kk <- n*(j-1) - j*(j-1)/2 + k-j MD[j,k] <- MD[k,j] <- D[kk] } return(MD) } #-------------------------------------------------------------# # Functions to decide clusters for Hierarchical Methods # # - Single, Complete, Average, Centroid, Median,Ward - # # Names of functions : ClstOrder, FindMinObsInc, RearrangeDist# # For this algorithm, see "Principles of Mutivariate Analysis"# # by W.J.Krzannowski,1988 Oxford Science Publications # #-------------------------------------------------------------# ClstOrder <- function(n,MD, nlabel, number, method) { TwoObs <- FindMinObsInc( n, MD) fobs <- TwoObs$fobs sobs <- TwoObs$sobs minimum <- TwoObs$minimum seqOrder <- c( nlabel[fobs], nlabel[sobs],minimum ) MDout <- RearrangeDist(n,MD,fobs,sobs,number,method) MD <- MDout$MD nlabel <- nlabel[-sobs] number[fobs] <- number[fobs] + number[sobs] number <- number[-sobs] result <- list(distance=MD, nlabel=nlabel, number=number, seqOrder=seqOrder) result } FindMinObsInc <- function( n, MD ) { minimum <- max(MD) + 10 for( i in 1:(n-1) ) for( j in (i+1):n ) { kk <- n*(i-1) - i*(i-1)/2 + j-i length <- MD[kk] if( length < minimum ) { minimum <- length fobs <- i sobs <- j } } TwoObs <- list(fobs=fobs,sobs=sobs,minimum=minimum) TwoObs } RearrangeDist <- function(n,MD,fobs,sobs,number, method) { ni <- number[fobs] nj <- number[sobs] obs <- c(1:n) robs <- obs[-c(fobs,sobs)] for(i in robs ) { if( i > fobs ) { i1 <- fobs f1 <- i } else { i1 <- i f1 <- fobs } kk1 <- n*(i1-1) - i1*(i1-1)/2 + f1-i1 if( i > sobs ) { i2 <- sobs s1 <- i } else { i2 <- i s1 <- sobs } kk2 <- n*(i2-1) - i2*(i2-1)/2 + s1-i2 kk3 <- n*(fobs-1) - fobs*(fobs-1)/2 + sobs-fobs nk <- number[i] switch(method, # Single linkage imsi.d <- 1/2*( MD[kk1] + MD[kk2] - abs(MD[kk1] - MD[kk2]) ), # Complete linkage imsi.d <- 1/2*( MD[kk1] + MD[kk2] + abs(MD[kk1] - MD[kk2]) ), # Average linkage imsi.d <- ni/(ni+nj) * MD[kk1] + nj/(ni+nj) * MD[kk2], # Centroid linkage imsi.d <- ni/(ni+nj) * MD[kk1] + nj/(ni+nj) * MD[kk2] - ni*nj/(ni+nj)^2 * MD[kk3], # Median linkage imsi.d <- 1/2* ( MD[kk1] + MD[kk2] ) - 1/4*MD[kk3] , # Ward's Method imsi.d <- ( (ni+nk)*MD[kk1] + (nj+nk)*MD[kk2] - nk*MD[kk3] ) / (ni+nj+nk) ) MD[kk1] <- imsi.d MD[kk2] <- imsi.d } morder <- c(1:n) morder <- morder[-sobs] delorder <- array(0*(n-1) ) k <- 0 for( i in morder) { k <- k+1 if(i < sobs) kk <- n*(i-1) - i*(i-1)/2 + sobs-i else if (i > sobs) kk <- n*(sobs-1) - sobs*(sobs-1)/2 + i- sobs delorder[k] <- kk } MD <- MD[-delorder] MD <- list(MD=MD, n=n-1) MD } DecideClst <- function(n,MB) { #-----------------------------------------------------------------# # function to decide cluster information from the result of S # input - n : number of obs. # MB: aggromoration order of the result # output - cluster matrix #-----------------------------------------------------------------# k <- n-2 B1 <- MB[,1] B2 <- MB[,2] MH <- matrix(0, nrow=n, ncol=k) MG <- array(0, n) MV <- array(0, n) value <- 0 for(i in 1:k) { obs1 <- B1[i] obs2 <- B2[i] if (MG[obs1] == 0 && MG[obs2] == 0) { value <- value + 1 MG[obs1] <- 1 MG[obs2] <- 1 MV[obs1] <- value MV[obs2] <- value } else if (MG[obs1] == 0 && MG[obs2] != 0) { MG[obs1] <- 1 MV[obs1] <- MV[obs2] } else if (MG[obs1] != 0 && MG[obs2] == 0) { MG[obs2] <- 1 MV[obs2] <- MV[obs1] } else if (MG[obs1] == 1 && MG[obs2] == 1) { minV <- min(MV[obs1],MV[obs2]) maxV <- max(MV[obs1],MV[obs2]) value <- value - 1 for(i in 1:n) { if(MV[i] == maxV) MV[i] <- minV else if(MV[i] > maxV) MV[i] <- MV[i] - 1 } } # assign cluster number to the observation not assigned MV1 <- MV n.cls <- max(MV1) for(j in 1:n) if( MG[j] == 0 ) { n.cls <- n.cls + 1 MV1[j] <- n.cls } MH[,n-1-i] <- MV1 } return(MH) } readData <- function() { #---------------------------------------------# # function to read the data file #---------------------------------------------# cat("\n ---- TYPE the DATA File Name : ") name <- readline() data.m <- scan(name) cat("\n -------------------------------") cat("\n (1) Raw Data ") cat("\n (2) Distance Matrix ") cat("\n -------------------------------") cat("\n Type the number : ") data.type <- readline() data.type <- as.integer(data.type) if( data.type == 1 ) { cat("\n ---- Number of columns : ") no.col <- readline() no.col <- as.integer(no.col) data.m <- matrix(data.m,ncol=no.col,byrow=T) n <- nrow(data.m) cat("\n : Standardize the Variables(Y/n)? : ") standard <- readline() if (standard != "n" ) { cat("\n -----------------------------------") cat("\n < Standard Type > ") cat("\n 1. Z-score 2. 0-1 transform ") cat("\n -----------------------------------") cat("\n Select (Default:1) : ") s.type <- readline() if( s.type == "2" ) data.m <- standard01Ftn(data.m) else data.m <- standardFtn(data.m) } cat("\n -------------------------------------------") cat("\n 1. Euclidean ") cat("\n 2. Squared Euclidean ") cat("\n 3. Maximum : maximum difference ") cat("\n 4. Manhattan : sum of absolute difference") cat("\n 5. binary : proportion of non-zeros ") cat("\n -------------------------------------------") cat("\n Select Distance Measure(Default:1) : ") dist.type <- readline() if(dist.type == "1") DistanceArray <- dist( data.m, metric="euclidean") else if(dist.type == "2") { DistanceArray <- dist( data.m, metric="euclidean") DistanceArray <- DistanceArray * DistanceArray } else if(dist.type == "3") DistanceArray <- dist( data.m, metric="maximum") else if(dist.type == "4") DistanceArray <- dist( data.m, metric="manhattan") else if(dist.type == "5") DistanceArray <- dist( data.m, metric="binary") else DistanceArray <- dist( data.m, metric="euclidean") } else if (data.type == 2 ) { cat("\n -------------------------------") cat("\n (1) Upper Triangular Matrix ") cat("\n (2) Lower Triangular Matrix ") cat("\n (3) Full Matrix ") cat("\n -------------------------------") cat("\n Type the number(Default=1) : ") upperValue <- readline() cat("\n --- Number of rows : ") n <- readline() n <- as.integer(n) if( upperValue == "2") { DistanceArray <- array(0, n*(n-1)/2 ) for(i in 1:(n-1) ) for(j in (i+1):n ) { kk1 <- (j-1)*(j-2)/2 + i kk2 <- n*(i-1)- i*(i-1)/2 + j-i DistanceArray[kk2] <- data.m[kk1] } } else if( upperValue == "3") DistanceArray <- MDistanceVectorFtn(data.m) else DistanceArray <- data.m } cat("\n I.D file (If not want, only RETURN) : ") lab.file <- readline() if( lab.file != "") # This is for Labelling in MST processing labName <- scan(lab.file, what="") else labName <- seq(n) result <- list(n=n,DistanceArray=DistanceArray, labName = labName) result } BeginMdsPlot <- function(distM, mst,labName) { #----------------------------------------------------------# # function to plot the MDS #----------------------------------------------------------# # cat("\n === Begin MDS processing ===\n") mdsResult <- mdsCalculate(distM) x <- mdsResult$x y <- mdsResult$y mdsModify <- mdsPlot2Dim(x,y,mst,labName) x <- mdsModify$x y <- mdsModify$y result <- list(x=x,y=y) result } MDistanceVectorFtn <- function(DistanceMatrix) { #---------------------------------------------------# # function of making distance vector(upper) from # full distance matrix #---------------------------------------------------# n <- ncol(DistanceMatrix) mv <- array(0, n*(n-1)/2 ) for(i in seq(n-1)) for(j in seq(from = i+1, to = n)) { arr.n <- n*(i-1) - i*(i-1)/2 + j-i mv[arr.n] <- DistanceMatrix[i,j] } return(mv) } RemoveObsPlot <- function(xF, yF, Distance, mstF, nodeF, mdistF, clsMatF, labN, name) { #------------------------------------------------------# # function for selecting removing obs. # -----------------------------------------------------# cat("\n Press the obs's with Left Button.") cat("\n After finishing, press Right Button ") cat("\n To stop, only press Right Button \n ") tname <- "MDS with MST" SelObs <- SelectObsPlot(xF,yF,mstF,labN, tname) lenR <- SelObs$lenR if(lenR == 0) break No.R <- SelObs$NoR NR <- paste(No.R, collapse=",") cat("\n Selected Obs's: ", NR) cat("\n Wait ! \n") DistanceMatrix <- Distance[-No.R, -No.R] labName <- labN[-No.R] x <- xF[-No.R] y <- yF[-No.R] n <- ncol(DistanceMatrix) DistanceArray <- MDistanceVectorFtn(DistanceMatrix) mstResult <- MstTree(n,DistanceArray) mst <- mstResult$mst mdist <- mstResult$dist node <- mstResult$node if(name == "Single") selMenu <- 1 else if(name == "Complete") selMenu <- 2 else if(name == "Average") selMenu <- 3 else if(name == "Centroid") selMenu <- 4 else if(name == "Median") selMenu <- 5 else if(name == "Ward") selMenu <- 6 clsMat <- DecideClusterCall(n, DistanceArray, selMenu) # ---- To see the effect of removing observations ---- # ---- simultaneously with or without removing observations ------- par(mfrow=c(1,2)) t.name <- paste(name,"-link", sep="") tr.name <- paste(t.name, ": Removed obs.", NR, sep="") mdsPlotRem(xF,yF,mstF,labN, t.name) mdsPlotRem(x,y,mst,labName, tr.name) CompareclstPlotting(xF,yF,No.R, mst,node,mdist,Distance, clsMat,mstF, nodeF,mdistF,clsMatF,labN,name) cat("\n Remove another obs's ? (Y/n) : ") YN <- readline() if (YN != "n") RemoveObsPlot(xF,yF,Distance,mstF,nodeF,mdistF,clsMatF,labN,name) } CompareRemovePlot <- function(xF,yF,DistMF,labN,mstF,nodeF,mdistF, cM1F, cM2F,type1,type2) { #------------------------------------------------------# # function to show the plot of two types of clusters # in the case of removing obs. # -----------------------------------------------------# cat("\n Press the obs's with Left Button.") cat("\n After finishing, press Right Button ") cat("\n To stop, only press Right Button \n ") tname <- "MDS with MST" SelObs <- SelectObsPlot(xF,yF,mstF,labN, tname) lenR <- SelObs$lenR if(lenR == 0) break No.R <- SelObs$NoR NR <- paste(No.R, collapse=",") cat("\n Selected Obs's : ", NR) cat("\n Wait ! \n") DistM<- DistMF[-No.R, -No.R] labName <- labN[-No.R] n <- ncol(DistM) DistanceArray <- MDistanceVectorFtn(DistM) mstData <- MstTree(n, DistanceArray) mst <- mstData$mst mstdist <- mstData$dist node <- mstData$node clsMat1 <- DecideClusterCall(n, DistanceArray, type1) clsMat2 <- DecideClusterCall(n, DistanceArray, type2) # ---- To see the effect of removing observations ---- # ---- simultaneously with or without removing observations ------- name1 <- nameMethodCall(type1) name2 <- nameMethodCall(type2) # Plots using removed data CompareMethodPlottingRemoving(xF,yF,DistMF,labN,mstF,nodeF,mdistF, cM1F,cM2F,DistM,labName,mst,node,mstdist, clsMat1,clsMat2,No.R, name1,name2) cat("\n Remove another obs's ? (Y/n) : ") YN <- readline() if (YN != "n") CompareRemovePlot(xF,yF,DistMF,labN,mstF,nodeF,mdistF, cM1F, cM2F,type1,type2) } CompareTwoPlot <- function(x,y,distA,labName,mst,node,mdist) { cat("\n ------------------------------") cat("\n Select two methods to Compare ") cat("\n 1. Single 2. Complete ") cat("\n 3. Average 4. Centroid ") cat("\n 5. Median 6. Ward ") cat("\n -----------------------------") cat("\n First Method : ") i1 <- readline() i1 <- as.numeric(i1) cat(" Second Method : ") i2 <- readline() i2 <- as.numeric(i2) name1 <- nameMethodCall(i1) name2 <- nameMethodCall(i2) n <- length(x) cM1 <- DecideClusterCall(n, distA, i1) cM2 <- DecideClusterCall(n, distA, i2) distM <- MDistance(n, distA) CompareMethodPlotting(x,y,distM,labName,mst,node,mdist, cM1,cM2,0,name1,name2) cat("\n Remove observations (Y/n) : ") remove.yes <- readline() if(remove.yes != "n") CompareRemovePlot(x,y,distM,labName,mst,node,mdist, cM1, cM2,i1,i2) } SelectClsMenu <- function() { cat("\n -------------------------------") cat("\n 1. Single Cluster Analysis ") cat("\n 2. Complete Cluster Analysis ") cat("\n 3. Average Cluster Analysis ") cat("\n 4. Centroid Cluster Analysis ") cat("\n 5. Median Cluster Analysis ") cat("\n 6. Ward's Method ") cat("\n 7. Compare Two Methods ") cat("\n 9. Exit ") cat("\n -------------------------------") cat("\n Select : The number is ---- : ") i <- readline() if(i=="9") i <- 8 i <- as.numeric(i) return(i) } DecideClusterCall <- function(n, DistA, selMenu, labName) { # define cluster k <- 0 nlabel <- c(1:n) seqOrder <- matrix(0, nrow=n-1, ncol=3) number <- array(1,n) for(i in n:2) { clstA <- ClstOrder(i, DistA, nlabel, number, selMenu) k <- k+1 seqOrder[k,] <- clstA$seqOrder DistA <- clstA$distance nlabel <- clstA$nlabel number <- clstA$number } # Cluster agglomeration name <- nameMethodCall(selMenu) clstProc <- matrix(0, ncol=3, nrow=n-1, byrow=T) dimnames(clstProc) <- list(NULL, c("fobs.", "sobs.", "distance")) clstProc[,1] <- labName[seqOrder[,1]] clstProc[,2] <- labName[seqOrder[,2]] clstProc[,3] <- seqOrder[,3] cat("\n === ", name, " Cluster Process =====\n", sep="") print(clstProc) seqOrder <- seqOrder[,-3] clsM <- DecideClst(n,seqOrder) clsM } nameMethodCall <- function(k) { if(k == 1) name <- "Single" else if(k == 2) name <- "Complete" else if(k == 3) name <- "Average" else if(k == 4) name <- "Centroid" else if(k == 5) name <- "Median" else if(k == 6) name <- "Ward" return(name) } MstMdsBegin <- function() { #----------------------------------------------------------# # Begin Function #----------------------------------------------------------# data <- readData() n <- data$n DistanceArray <- data$DistanceArray labName <- data$labName ### === Minimum Spanning Tree === ### mstResult <- MstTree(n, DistanceArray) mst <- mstResult$mst mstdist <- mstResult$dist node <- mstResult$node cat("\n === Save MST Result(.mst) === ") cat("\n Type File name (if not, only RETURN) : ") mstName <- readline() if( mstName != "") { sink(mstName) print(mstResult) sink() } MstMdsBeginPlot(x,y,n,DistanceArray,mst,node,mstdist,labName) } MstMdsBeginPlot <- function(x,y,n,distA,mst,node,mstdist, labName,initial=1) { ### ==== Now Plot MDS superimposed by MST ==== ### motif() #win.graph() # Call Graphic Window for Plotting in Window 95 distM <- MDistance(n, distA) if(initial == 1) { mdsOut <- BeginMdsPlot( distM, mst, labName) x <- mdsOut$x y <- mdsOut$y } else { t.name <- "MDS with MST" mdsPlotRem(x,y,mst,labName, t.name) } selMenu <- SelectClsMenu() if(selMenu < 7) { clsM <- DecideClusterCall(n, distA, selMenu, labName) name <- nameMethodCall(selMenu) clstPlotting( x,y, distM,mst, node, mstdist, clsM, labName,name) cat("\n Remove observations (Y/n) : ") remove.yes <- readline() if(remove.yes != "n") RemoveObsPlot( x,y, distM,mst, node, mstdist, clsM, labName,name) } else if(selMenu == 7) CompareTwoPlot(x,y,distA,labName,mst,node,mstdist) else if(selMenu == 8) break MstMdsBeginPlot(x,y,n,distA,mst,node,mstdist,labName,initial=2) } MstMdsBegin()