# example code to load a data set, compute and compare distances # code assumes you have saved data files in your working directory # you can set that working directory using File/Change Dir menu # or setwd('XXX') where XXX is the full path using / # e.g. c:/users/netid/project library(vegan) # read a .csv file lipo <- read.csv('lipo.csv', as.is=T) # or, read a data matrix with spaces park <- read.table('parkgrass.txt', header=T, as.is=T) # I use as.is=T to stop automatic conversion of character strings to factors # descriptive info about the data matrix dim(park) names(park) str(park) # convert the data frame to a matrix with ONLY the abundance info # usually have to remove one or more columns with labels # there are two of these in both lipo and parkgrass # often there is only one, in which case, you would use only [,-1] park.data <- park[,c(-1,-2)] park.m <- as.matrix(park.data) # output and plots are more informative if the data matrix has # appropriate row (species/compound) and column (site / sample) names # column names are already set using the names from the data frame # see this: dimnames(park.m) # add row names to the data matrix, using the appropriate column from # original data frame dimnames(park.m)[[1]] <- park$sppname # check if you want to make sure dimnames(park.m) # if the file is 'backwards', i.e. rows are species, transpose after # creating the matrix park.m <- t(park.m) # pretty up the site names by removing the X. probably not necessary # for your data matrix sites <- dimnames(park.m)[[1]] sites <- substring(sites,2,nchar(sites)) dimnames(park.m)[[1]] <- sites # Now we can have fun # calculate distance matrix # standardize to site total park.m <- decostand(park.m,'total') # then compute Bray-Curtis distance (default for vegdist) park.bray <- vegdist(park.m) # can compute many other distances park.eucl <- vegdist(park.m, 'eucl') # Euclidean park.mh <- vegdist(park.m, 'horn') # Morisita-Horn # scatter plot matrix to compare distances # combine columns (cbind), then plot matrix (pairs) pairs(cbind(park.bray,park.eucl,park.mh)) # can convert the distance object to a matrix, if need to temp <- as.matrix(park.bray) # which makes it easy to look at parts of the distance array temp[1:5,1:5] # if you want to identify individual pairs of sites plot(park.bray,park.mh, xlab='Bray-Curtis distance', ylab='Morisita-Horn distance') temp <- identify(park.bray, park.mh) # which.site converts the obs. number to the pair of site names # defined in whichsite.r which.site(temp, park.bray)