#examples with sampling distributions #go over the example in class #my example had the following units: ids=c(66,86,87,3,34,51,15,48,97,97) #the responses for those ids were: resp=c(10,5,8,1,12,16,9,6,4,4) #the mean was: ybar=mean(resp) #the standard deviation was: s=sd(resp) #the standard error for Ybar was: se=sd(resp)/sqrt(10) #let us compute a histogram for all your values your.ybars=c() hist(your.ybars) #where is the population mean? #In this example, I know the data for the entire population, #so I can actually go and repeat this exercise by drawing more samples. #let's read in the data first. areas.pop=scan("100Areas.txt") #compute the mean for the sample from class mean(areas.pop[ids]) #draw another sample, size 20 this time resp=sample(areas.pop,20) #compute mean and std.err mean(resp) sd(resp)/sqrt(20) #draw many samples and compute the average each time. Then draw a histogram.We need to use a ``function'' for this #the function is written in a separate file which we will #read into R by using the command ``source''. #The file name is Lab2Sampling.r source("Lab2Sampling.r") #the name of the function we need to use is repeat.samples #and its arguments are (in this order):``population''(all responses),``sample.size''(denoted by n in class) and ''M'' (the number of samples to be drawn from the population) #to run this function you will need to specify three things: the population to sample from, the sample size and how many sample you would like to draw. For example, type: repeat.samples(areas.pop,20,100) #this reported 100 sample means of samples of size 20 #to draw histograms of the result you will need to save these results mult.ybars=repeat.samples(areas.pop,20,100) #then draw a histogram hist(mult.ybars) #do this 1000 times mult.ybars=repeat.samples(areas.pop,20,1000) #then draw a histogram hist(mult.ybars) #increase the sample size to 30 mult.ybars=repeat.samples(areas.pop,30,1000) #then draw a histogram hist(mult.ybars) #draw 10000 random samples of size 5 mult.ybars.5=repeat.samples(areas.pop,5,10000) hist(mult.ybars.5) #draw 10000 random samples of size 10 mult.ybars.10=repeat.samples(areas.pop,10,10000) hist(mult.ybars.10) #draw 10000 random samples of size 50 mult.ybars.50=repeat.samples(areas.pop,50,10000) hist(mult.ybars.50) #add a red line on the histogram indicating the true mean abline(v=mean(areas.pop),col=2) #draw a graph containing the following 4 figures: a histogram of the population, 10000 sample means for sample size 5, 10 and 50, indicating the true average (mu) on all these graphs par(mfrow=c(2,2)) hist(areas.pop) abline(v=mean(areas.pop),col=2) hist(mult.ybars.5) abline(v=mean(areas.pop),col=2) hist(mult.ybars.10) abline(v=mean(areas.pop),col=2) hist(mult.ybars.50) abline(v=mean(areas.pop),col=2)