#----------------------------------------------------------- # This program is plotting dendrogram of cluster analysis. # Considered cluster analyses are single, complete, average. # We also considered the dendrogram with some observations. #----------------------------------------------------------- standardFtn <- function(x) { #-------------------------------------------------# # function of standarzing of raw data #-------------------------------------------------# n <- nrow(x) j <- c(rep(1,n)) j <- matrix(j,ncol=1, byrow=T) meanX <- t(x) %*% j / n Xd <- x - j %*% t(meanX) Cov <- t(Xd) %*% Xd / (n-1) Var <- diag(Cov) SD <- sqrt(Var) SD <- diag(SD) SD <- solve(SD) standX <- Xd %*% SD return(standX) } MDistance <- function(n,D) { #-------------------------------------------------------# # function to make distance Matrix from distance vector #-------------------------------------------------------# MD <- matrix(0, nrow=n, ncol=n) for(j in 1:(n-1) ) for(k in (j+1):n) { kk <- n*(j-1) - j*(j-1)/2 + k-j MD[j,k] <- MD[k,j] <- D[kk] } return(MD) } MDistanceVectorFtn <- function(DistanceMatrix) { #---------------------------------------------------# # function of making distance vector(upper) from # full distance matrix #---------------------------------------------------# n <- ncol(DistanceMatrix) mv <- array(0, n*(n-1)/2 ) for(i in seq(n-1)) for(j in seq(from = i+1, to = n)) { arr.n <- n*(i-1) - i*(i-1)/2 + j-i mv[arr.n] <- DistanceMatrix[i,j] } return(mv) } readData <- function() { #---------------------------------------------# # function to read the data file #---------------------------------------------# cat("\n ---- TYPE the DATA name : ") name <- readline() data.m <- scan(name) cat("\n -------------------------------") cat("\n (1) Raw Data ") cat("\n (2) Distance Matrix ") cat("\n -------------------------------") cat("\n Type the number : ") data.type <- readline() data.type <- as.integer(data.type) if( data.type == 1 ) { cat("\n ---- Number of columns : ") no.col <- readline() no.col <- as.integer(no.col) data.m <- matrix(data.m,ncol=no.col,byrow=T) n <- nrow(data.m) cat("\n : Standardize the Variables(Y/n)? : ") standard <- readline() if (standard != "n" ) data.m <- standardFtn(data.m) cat("\n -------------------------------------------") cat("\n 1. Euclidean ") cat("\n 2. Squared Euclidean ") cat("\n 3. Maximum : maximum difference ") cat("\n 4. Manhattan : sum of absolute difference") cat("\n 5. binary : proportion of non-zeros ") cat("\n -------------------------------------------") cat("\n Select Distance Measure(Default:1) : ") dist.type <- readline() if(dist.type == "1") DistanceArray <- dist( data.m, metric="euclidean") else if(dist.type == "2") { DistanceArray <- dist( data.m, metric="euclidean") DistanceArray <- DistanceArray * DistanceArray } else if(dist.type == "3") DistanceArray <- dist( data.m, metric="maximum") else if(dist.type == "4") DistanceArray <- dist( data.m, metric="manhattan") else if(dist.type == "5") DistanceArray <- dist( data.m, metric="binary") else DistanceArray <- dist( data.m, metric="euclidean") } else if (data.type == 2 ) { cat("\n -------------------------------") cat("\n (1) Upper Triangular Matrix ") cat("\n (2) Lower Triangular Matrix ") cat("\n (3) Full Matrix ") cat("\n -------------------------------") cat("\n Type the number(Default=1) : ") upperValue <- readline() cat("\n --- Number of rows : ") n <- readline() n <- as.integer(n) if( upperValue == "2") { DistanceArray <- array(0, n*(n-1)/2 ) for(i in 1:(n-1) ) for(j in (i+1):n ) { kk1 <- (j-1)*(j-2)/2 + i kk2 <- n*(i-1)- i*(i-1)/2 + j-i DistanceArray[kk2] <- data.m[kk1] } } else if( upperValue == "3") DistanceArray <- MDistanceVectorFtn(data.m) else DistanceArray <- data.m } cat("\n I.D file (If not want, only RETURN) : ") lab.file <- readline() if( lab.file != "") # This is for Labelling in MST processing labName <- scan(lab.file, what="") else { labName <- seq(n) labName <- as.character(labName) } result <- list(n=n,DistanceArray=DistanceArray, labName = labName) result } DenClsMenu <- function() { cat("\n -------------------------------") cat("\n Dendrogram for ") cat("\n 1. Single Cluster Analysis ") cat("\n 2. Complete Cluster Analysis ") cat("\n 3. Average Cluster Analysis ") cat("\n 9. Exit ") cat("\n -------------------------------") cat("\n Select : The number is ---- : ") i <- readline() if(i=="9") i <- 4 i <- as.numeric(i) return(i) } RemoveDenPlot <- function( n, distM, labName, DenName, DenSelect) { #------------------------------------------------------# # function for displaying dendrogram without obs. # -----------------------------------------------------# cat("\n Removing Obs.No.(To stop, only RETURN) : ") NR <- readline() if(NR == "") break No.R <- as.integer(NR) RdistM <- distM[-No.R, -No.R] RlabName <- labName[-No.R] cat("\n Label =", RlabName, sep=" ") Rt.name <- paste(DenName, " without obs.",NR,sep="") if(DenSelect == 1) Rden.clust <- hclust(RdistM, method="connected") else if(DenSelect == 2) Rden.clust <- hclust(RdistM, method="compact") else if(DenSelect == 3) Rden.clust <- hclust(RdistM, method="average") #win.graph() motif() # Call Graphic Window for Unix par(pty="s") plclust(Rden.clust, labels=RlabName) title(Rt.name) } DenBegin <- function() { data <- readData() n <- data$n distA <- data$DistanceArray labName <- data$labName distM <- MDistance(n, distA) DendrogramPlot(n,distM,labName) } DendrogramPlot <- function(n,distM,labName) { #----------------------------------------------------------# # Begin Function #----------------------------------------------------------# DenSelect <- DenClsMenu() if(DenSelect == 1) { den.clust <- hclust(distM, method="connected") DenName <- "Dendrogram of Single-link" } else if(DenSelect == 2) { den.clust <- hclust(distM, method="compact") DenName <- "Dendrogram of Complete-link" } else if(DenSelect == 3) { den.clust <- hclust(distM, method="average") DenName <- "Dendrogram of Average-link" } else if(DenSelect == 4) break # win.graph() # Call Graphic Window for Window 95 motif() # Call Graphic Window for Unix par(pty="s") plclust(den.clust, labels=labName) title(DenName) # box() cat("\n Remove observations (Y/n) : ") remove.yes <- readline() if(remove.yes != "n") RemoveDenPlot( n, distM, labName, DenName, DenSelect) DendrogramPlot(n,distM,labName) } DenBegin() --------------7883A343D5E0ACBE36A22115--