# Multitype NN test functions NNid <- function(xy, splancs=T) { # find id of nearest neighbor, given xy positions as a matrix # uses either of two functions, # depending on whether splancs or spatialstats is available # if group is non null, then finds the neighbor that is withing # the same group as the point. Typical use is restricting # neighbors to same side of the street. if (splancs) { nndistG(xy)$neighs } # find nn routine in splancs else { temp <- find.neighbor(xy,k=2); # find nn in spatialstats as.vector(temp[temp[,1] != temp[,2],2]); # remove self neighbors, return id of neighbor # N.B. assumes find.neighbor returns list in order of points 1, 2, } } # check is a debugging function check <- function(x, v, l1 ,l2) { if (v[l1,l2] > 0) { print("WARNING from routine ",x,": element",l1,l2,"already non-zero"); } v[l1,l2] <- x v } # ginv computes a generalized inverse of a matrix ginv <- function(m) { # compute a generalized inverse of matrix m temp <- eigen(m, symmetric=T); # calculate eigenvalues and eigenvectors va <- temp$values; # extract the e-vals and e-vects ve <- temp$vectors; va <- ifelse((abs(va)<1e-9), 0, 1/va); # invert e-vals, but leave 0's alone va2 <- 0*m; # matrix of zeros, same size as m diag(va2) <- va # stuff into a square matrix ve %*% va2 %*% t(ve); # and construct inverse } mNNinfo <- function(xy, label, nnid=NULL, splancs=T) { # data is xy coordinates in xy, labels in label # output is a list with ON = obs. contingency table, # EN = expected counts # VarN = variance-covariance matrix of counts if (is.null(nnid)) { nnid <- NNid(xy,splancs); # get the nearest neighbor id } # if not already provided n <- table(label) N <- sum(n); l <- names(n); # names of the types of points k <- length(l); R <- sum((1:N) == nnid[nnid]); # number of refl NN Q1 <- table(c(1:6,table(nnid))) Q <- 2*Q1[2] + 6*Q1[3] + 12*Q1[4] + 20*Q1[5] + 30*Q1[6] - 70; # get # shared neighbors, extra bit (1:6) is to ensure # that outer table() always generates a length 6 vector # correct for the extra by subtracting (2+6+12+20+30)=70 ON <- matrix(0,nrow=k, ncol=k); # loop over all pairs of types to get Obs count for (i in 1:k) { for (j in 1:k) { ON[i,j] <- sum((label==l[i]) & (label[nnid] ==l[j])); } } # use second function to calculate E N and Var N temp <- mNNinfo2(n, R, Q); # then return all the bits list(ON=ON, EN=temp$EN, VarN=temp$VarN, R=R, Q=Q); } mNNinfo2 <- function(n, R, Q) { # do the real work calculating moments of N # (expected values and V-C matrix) N <- sum(n); k <- length(n); l <- names(n); EN <- matrix(0,nrow=k, ncol=k); VN <- VarN <- matrix(0, nrow=k*k, ncol=k*k); # loop over all pairs of types to get Obs and expected counts for (i in 1:k) { for (j in 1:k) { EN[i,j] <- n[i] * (n[j] - (i==j))/(N-1); } } # loop over all combinations to get Variance-covariance matrix for (l1 in 1:(k*k)) { i <- 1 + (l1-1)%/%k; j <- 1+ (l1-1)%%k; for (l2 in l1:(k*k)) { i2 <- 1 + (l2-1)%/%k; j2 <- 1+(l2-1)%%k; # Variance elements if ((i==i2) & (j==j2)) { if (i == j) { # Var Naa p2 <- n[i]*(n[i]-1)/(N*(N-1)); p3 <- p2*(n[i]-2)/(N-2); p4 <- p3*(n[i]-3)/(N-3); VN <- check(1, VN,l1,l2); VarN[l1,l2] <- (N+R)*p2 + (2*N-2*R+Q)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i,j]; } else { p2 <- n[i]*n[j]/(N*(N-1)); p3 <- p2*(n[i]-1)/(N-2); p4 <- p3*(n[j]-1)/(N-3); VN <- check(2, VN,l1,l2); VarN[l1,l2] <- N*p2 + Q*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i,j]; } } else if ((i == j) & (i == i2) & (j != j2)) { # Cov Naa, Nab p3 <- n[i]*(n[i]-1)*n[j2]/(N*(N-1)*(N-2)); p4 <- p3 * (n[i]-2)/(N-3); VN <- check(3, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i2 == j2) & (i == i2) & (j != j2)) { # Cov Nab, Naa p3 <- n[i2]*(n[i2]-1)*n[j]/(N*(N-1)*(N-2)); p4 <- p3 * (n[i]-2)/(N-3); VN <- check(3, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i2 == j2) & (j == j2) & (i != i2)) { # Cov Nab, Nbb p3 <- n[j]*(n[j]-1)*n[i]/(N*(N-1)*(N-2)); p4 <- p3 * (n[j]-2)/(N-3); VN <- check(4, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R+Q)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i == j) & (i == j2) & (i != i2)) { # Cov Naa, Nba p3 <- n[i]*(n[i]-1)*n[i2]/(N*(N-1)*(N-2)); p4 <- p3 * (n[i]-2)/(N-3); VN <- check(14, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R+Q)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i ==j) & (i2 == j2) & (i != i2)) { # Cov Naa, Nbb p4 <- n[i]*(n[i]-1)*n[i2]*(n[i2]-1)/(N*(N-1)*(N-2)*(N-3)); VN <- check(5, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i == j) & (i2 != i) & (j2 != j) & (i2 != j2)) { # Cov Naa, Nbc p4 <- n[i]*(n[i]-1)*n[i2]*n[j2]/(N*(N-1)*(N-2)*(N-3)); VN <- check(6, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i2 == j2) & (i2 != i) & (j2 != j) & (i != j)) { # Cov Nab, Ncc p4 <- n[i2]*(n[i2]-1)*n[i]*n[j]/(N*(N-1)*(N-2)*(N-3)); VN <- check(6, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i == i2) & (i != j) & (i2 != j2) & (j != j2)) { # Cov Nab, Nac p4 <- n[i]*(n[i]-1)*n[j]*n[j2]/(N*(N-1)*(N-2)*(N-3)); VN <- check(7, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i==j2) & (i2==j) & (i != j)) { # Cov Nab, Nba p2 <- n[i]*n[j]/(N*(N-1)); p3 <- p2 * (n[i]-1+n[j]-1)/(N-2); p4 <- p2 * (n[i]-1)*(n[j]-1)/((N-2)*(N-3)); VN <- check(8, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- R*p2 + (N-R)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i != j) & (j == i2) & (i2 != j2) & (i != j2)) { # Cov Nab, Nbc p3 <- n[i]*n[j]*n[j2]/(N*(N-1)*(N-2)); p4 <- p3 * (n[j]-1)/(N-3); VN <- check(9, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i != j) & (j == j2) & (i2 != j2) & (i != i2)) { # Cov Nab, Ncb p3 <- n[i]*n[j]*n[i2]/(N*(N-1)*(N-2)); p4 <- p3 * (n[j]-1)/(N-3); VN <- check(10, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- Q*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i != j) & (i == j2) & (i2 != j2) & (j != i2)) { # Cov Nab, Nca p3 <- n[i]*n[j]*n[i2]/(N*(N-1)*(N-2)); p4 <- p3 * (n[i]-1)/(N-3); VN <- check(11, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N-R)*p3 + (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } else if ((i != j) & (i != i2) & (i != j2) & (j != i2) & (j != j2) & (i2 != j2) ) { # Cov Nab, Ncd p4 <- n[i]*n[j]*n[i2]*n[j2]/(N*(N-1)*(N-2)*(N-3)); VN <- check(12, VN,l1,l2); VarN[l1,l2] <- VarN[l2,l1] <- (N*(N-3)-Q+R)*p4 - EN[i,j]*EN[i2,j2]; } } } v <- as.vector(t(outer(l,l,paste,sep=''))); dimnames(VN) <- dimnames(VarN) <- list(v,v); list(EN=EN, VN = VN, VarN = VarN); } mNNtest <- function(info, obsN=NULL) { # use the output from mNNinfo to construct tests # if gave a specific value for obsN, use that, It should be a vector! # otherwise, use the value from info if (is.null(obsN)) obsN <- as.vector(t(info$ON)) expN <- as.vector(t(info$EN)); varN <- diag(info$VarN); Z <- (obsN-expN)/sqrt(varN) names(Z) <- dimnames(info$VarN)[[1]] k <- nrow(info$EN) delta <- as.matrix(obsN-expN); C <- t(delta) %*% ginv(info$VarN) %*% delta; pC <- 1-pchisq(C,k*(k-1)); Ci <- rep(0,k); for (i in 1:k) { i1 <- 1+(i-1)*k; i2 <- i1 + (k-2); Ci[i] <- t(delta[i1:i2,]) %*% solve(info$VarN[i1:i2,i1:i2]) %*% delta[i1:i2]; } pCi <- 1-pchisq(Ci,k-1); list(Z = Z, C = c(C,pC), Ci = cbind(Ci,pCi)); } alt.test <- function(info,rand, alpha=0.05) { # compute alternate tests of each cell count EN <- as.vector(info$EN) VN <- diag(info$VarN) # Normal approx n.crit <- EN + qnorm(alpha/2)*sqrt(VN); all.p <- apply(rand < n.crit,1,mean); n.crit <- EN + qnorm(1-alpha/2)*sqrt(VN); all.p <- cbind(Norm.l = all.p, Norm.u = apply(rand > n.crit,1,mean)); # Chi-square approx from Cliff and Ord, p 57 df <- 2*EN^2/VN; # lower tail chi.crit <- VN*qchisq(alpha/2,df)/(2*EN); all.p <- cbind(all.p,Chi.l =apply(rand chi.crit,1,mean)); # and Poisson approx p.crit <- qpois(alpha/2, EN); all.p <- cbind(all.p,Pois.l =apply(rand p.crit,1,mean)); all.p } all.test <- function(alpha = 0.05) { all.p <- matrix(0,nrow=11,ncol=6); temp <- mNNinfo2(c(M=104,F=133,J=20), 164, 162); temp2 <- alt.test(temp, rand20,alpha=alpha) all.p[1,] <- temp2[9,]; temp <- mNNinfo2(c(M=104,F=129,J=24), 164, 162); temp2 <- alt.test(temp, rand24,alpha=alpha) all.p[2,] <- temp2[9,]; temp <- mNNinfo2(c(M=104,F=125,J=28), 164, 162); temp2 <- alt.test(temp, rand28,alpha=alpha) all.p[3,] <- temp2[9,]; temp <- mNNinfo2(c(M=104,F=121,J=32), 164, 162); all.p[4,] <- alt.test(temp, rand32,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=117,J=36), 164, 162); all.p[5,] <- alt.test(temp, rand36,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=113,J=40), 164, 162); all.p[6,] <- alt.test(temp, rand40,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=108,J=45), 164, 162); all.p[7,] <- alt.test(temp, rand45,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=103,J=50), 164, 162); all.p[8,] <- alt.test(temp, rand50,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=93,J=60), 164, 162); all.p[9,] <- alt.test(temp, rand60,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=83,J=70), 164, 162); all.p[10,] <- alt.test(temp, rand70,alpha=alpha)[9,] temp <- mNNinfo2(c(M=104,F=73,J=80), 164, 162); temp2 <- alt.test(temp, rand80,alpha=alpha); all.p[11,] <- temp2[9,]; all.p <- cbind(N=c(20,24,28,32,36,40,45,50,60,70,80), all.p); dimnames(all.p) <- list(rep(' ',11), c("N","Norm.l","Norm.u","Chi.l","Chi.u","Pois.l","Pois.u")); all.p }