#############################################
####### TREE QUADRATS example program #######
#############################################

#Normally, at the start of an R program you have the various function(s) defined that are used by the program.
#This has to come before the MAIN PROGRAM (which is where your program really starts).

cstest=function(observed,expected,P,doftosubtract) {
brief=FALSE	##For a less detailed report of this test, change this to "brief=TRUE"

if (length(observed)!=length(expected) || !is.finite(sum(observed)) || !is.finite(sum(expected))) {cat("Tidying up data.\n")}
obs=c();exd=c()
for (i in 1:min(length(observed),length(expected))) {
 if (is.finite(observed[i]) && is.finite(expected[i])) {obs=c(obs,observed[i]);exd=c(exd,expected[i])}
}
nu=length(obs)-doftosubtract	#nu is the number of degrees of freedom. Subtract 1 if the expected values came from a source independent of the observed values, subtract >1 if you have a contingency table with more than one column or the expected values are derived from e.g. a model fit.
if (nu<1) {
 cat("No degrees of freedom - can't do test.\n")
} else {
 alpha=(100-P)/100
 chisqthresh=qchisq(1-alpha,df=nu)	#this command replaces the old method of looking up the threshold in a statistical table. For me it's not necessary to know anything about the chi-squared distribution itself, but there's an illustration of it at http://www.statsoft.com/textbook/statistics-glossary/c/button/c/#Chi-square%20Distribution if you want to see what's behind this command.
 if (!brief) {cat("Threshold is: chi^2_(",alpha,",",nu,") =",chisqthresh,"\n")}
 if (nu<2) {
  cat("Applying Yates's correction.\n")
  chisqstat=sum(((abs(obs-exd)-0.5)^2)/exd)
 } else {
  chisqstat=sum(((obs-exd)^2)/exd)
 }
 if (!brief) {cat("Test statistic is: Chi^2 =",chisqstat,"\n")}
 pvalue=1-pchisq(chisqstat,df=nu)
 pvaluetext1=paste("p=",pvalue)
 if (pvalue>0.1) {pvaluetext1="p>0.1"}
 if (pvalue<0.001) {pvaluetext1="p<0.001"}
 pvaluetext2="not significant"
 if (pvalue<0.1) {pvaluetext2="weakly significant"}
 if (pvalue<0.05) {pvaluetext2="significant"}
 if (pvalue<0.01) {pvaluetext2="highly significant"}
 if (pvalue<0.001) {pvaluetext2="very highly significant"}
 if (!brief) {cat("At the",P,"% significance level (ie. you're saying that you're only going to reject a null model if there's a <",100-P,"%\nchance that you'll be wrong due to the results being 'coincidence' or 'unusual' (which is always a possibility when\nmaking generalisations about a population from measurements of a sample)):\n")}
 if (chisqstat<chisqthresh) {	#equivalent to "if (pvalue>alpha) {"
  if (!brief) {cat("\tThe observed values **AGREE** with those predicted by the null hypothesis (i.e. this is not enough evidence to reject the\nnull hypothesis; the data fits the null hypothesis tolerably well; the deviation of observed values from the null hypothesis can be\nexplained by chance alone; observations are what was expected).\n")}
 cat("Observations are as expected, so null hypothesis accepted (chi-squared test, n=",length(obs),",",pvaluetext1,").\n")
 } else {
  if (!brief) {cat("\tThe observed values **ARE DIFFERENT** from those predicted by the null hypothesis (ie. the data doesn't support the\nnull hypothesis; the null hypothesis should be rejected; consider alternative hypotheses)\n")}
  if (!brief) {cat("\t(p value for this result is",pvalue,"- a",pvaluetext2,"rejection of the null hypothesis).\n")}
  if (mean(obs)<mean(exd)) {cat("Observations are generally lower than expected,")} else {cat("Observations are generally higher than expected,")}
  cat(" so null hypothesis rejected (chi-squared test, n=",length(obs),",",pvaluetext1,").\n")
 }
}

}

############################
####### MAIN PROGRAM #######
############################

filedata=read.table("quadratdata",header=TRUE)
	#reading from file "quadratdata" (just to demonstrate how to do that). Some people use read.delim(...) or read.csv(...), which are basically the same as read.table(...,sep="\t") and read.table(...,sep=","). Sometimes, R reads in a column of numbers, but doesn't recognise the values and labels them as 'factors': if this happens to you, try the solution at https://stat.ethz.ch/pipermail/r-help/2000-February/010165.html.
categories=as.vector(t(filedata[1]))
	#equivalent of categories=c("0","1","2","3","4","5","6","7","8+")	
	#these categories MUST be mutually exclusive AND exhaustive
observed=as.vector(t(filedata[2]))
	#equivalent of observed=c(6,23,25,43,59,54,27,13,10)
	#these observed counts must be count data (ie. integers) for this test

if (length(categories)!=length(observed)) {stop("There's something up with the categories and/or observed counts.")}
checkcount=0;for (i in 1:length(observed)) {if (observed[i]<5) {checkcount=checkcount+1}}
if (min(observed)<2 || checkcount>(length(observed)*20/100)) {stop("This result is invalid because you've got too few counts.\nYou need a) all of them >1 and b) less than 20% of them <5 otherwise you can't use a chi-squared test.\nTry grouping your data in to larger categories.\n")}

numquadrats=sum(observed)
estofmean=sum(observed*c(0,1,2,3,4,5,6,7,10))/numquadrats	#assuming mean of the 8+ category is 10.

#I think the hardest bit is always working out the expected values:
probsA=c(rep(0,times=length(observed)))
tmp=trunc(estofmean);if (tmp>(length(observed)-1)) {tmp=length(observed)-1}
probsA[tmp+1]=1			#for a uniform distribution of trees, all quadrats should contain (estofmean) trees
tmp=dpois(0:(length(observed)-2),lambda=estofmean)
probsB=c(tmp,1-sum(tmp))	#for a random distribution, the numbers come from a Poisson distribution
expectedA=numquadrats*probsA
expectedB=numquadrats*probsB

#Now some explanation to the user of what's going on:
cat("You are an ecologist and you have set out some randomly and independently-positioned quadrats, each 10m by 10m, in a\nforest. You have counted the number of trees in each quadrat and your results are in the 'Observed' column of this\ntable (no. of trees in the first column) and the bar plot:\n")
table=data.frame(observed,expectedA,expectedB)
rownames(table)=categories
colnames(table)=c("Observed counts","Expected (A)","Expected (B)")
print(table)
bpmatrix=t(as.matrix(table))
barplot(bpmatrix,beside=TRUE,xlab="no. of trees",ylab="counts",legend=colnames(table))
cat("Total number of quadrats:",numquadrats,"\nEstimated mean number of trees per quadrat (assuming mean of the 8+ category is 10):",estofmean,"\n")
cat("You want to use a chi-squared 'goodness-of-fit' test to assess two null models: a uniform distribution (A) and a\nrandom distribution (B) (your alternative model/theory/hypothesis might be, for example, a clumped distribution). I've\nput the expected values from each of these theories in the table and the bar plot too.\n")
cat("Press a <ENTER> or <RETURN> to continue ");tmp=scan(n=1,quiet=TRUE)

#and, finally, the chi-squared tests themselves:
P=95			#using a confidence level of P% (only use 95% or 99% in publications)
cat("\nWith A as the null theory/model/hypothesis:\n")
cstest(observed,expectedA,P,2)	#need to subtract 1 degree of freedom anyway for chi squared, but an extra one because estofmean was used in deriving the expected values
cat("Press a <ENTER> or <RETURN> to continue ");tmp=scan(n=1,quiet=TRUE)
cat("\nWith B as the null theory/model/hypothesis:\n")
cstest(observed,expectedB,P,2)

#Final comment about the way I've done the test above:
#Most R-users would do a chi-squared test using an in-built R function like this one:
#   pvalue=chisq.test(x=observed,p=expected,rescale.p=TRUE)$p.value
#(read the ?chisq.test manual page for more about how to use this). I'm aware of this, but I generally don't do things that way because all the calculations are hidden from me. It's a shame that R is not written with a 'verbose' option that goes through the calculations step-by-step for you with explanations, but that's the way life is. Why do I need to know what's going on? So that I can a) explain how the test works to someone else and b) if you ever have to do a nonstandard analysis you'll have to learn all about the standard ones in depth anyway.
