options(max.print=2000000) rm(list=ls()) ################################################################# ## Install R packages ## ## A smart function ## ################################################################# check.packages <- function(pkg){ new.pkg <- pkg[!(pkg%in%installed.packages()[,"Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, require, character.only = TRUE) } packages <- c('lme4', 'reshape2' ) check.packages(packages) setwd("C:/Users/wdbeavis/Desktop/PBEA/QG/Module 10") #Next load in the data set from a spreadsheet (excel.csv) dslong<-read.csv("QG_Mod10_ALAs_ds.csv",header=T) summary(dslong) dslong$line<-factor(dslong$line) dslong$env<-factor(dslong$env) dslong$blk<-factor(dslong$blk) summary(dslong) ###################################################### ### Conduct Exploratory Data Analyses #### ### Some useful tools are histograms and boxplots #### ###################################################### hist(dslong$Pheno,col="beige", main="Frequency of Phenotypic Values", xlab="Phenotypic Values") boxplot(dslong$Pheno~dslong$env,main="Boxplot by Environment", xlab="Environment",ylab="Phenotypic Value") dsenvA<-subset(dslong,env=="A") dsenvB<-subset(dslong,env=="B") dsenvC<-subset(dslong,env=="C") dsenvD<-subset(dslong,env=="D") dsenvE<-subset(dslong,env=="E") dsenvF<-subset(dslong,env=="F") dsenvG<-subset(dslong,env=="G") dsenvH<-subset(dslong,env=="H") dsenvI<-subset(dslong,env=="I") dsenvJ<-subset(dslong,env=="J") lmer(data=dsenvA, Pheno ~ blk + (1|line) ) ###################################################### ### Conduct Data Analyses on data combined across #### ### environments to obtain an estimate of V(GxE) #### ###################################################### anova(lm(data=dslong,Pheno~env+line+line*env)) lmer(data=dslong, Pheno ~ env + (1|line)+ (1|env:line) ) ######################################################## ### Create wide, ie. messy data sets for use in ### ### Cluster Analyses. tell melt function of reshape2 ### #### ### that line, env and rep are identifiers ### ######################################################## ds2<-melt(dslong, id.vars = c("line","env","blk")) head(ds2) ######################################################## #reshape from long to wide for clustering lines ###### ######################################################## ds2wide<-dcast(ds2,line ~ env, mean) head(ds2wide) write.csv(ds2wide, file= "QG_Mod10_for_ALA10.3_lines_in_rows.csv") ######################################################## #cluster lines based on pheno ################ ######################################################## distance<-dist(ds2wide,method="euclidean") group<-hclust(distance,method="ward.D") plot(group,main="Cluster of lines with similar Yield responses across environments",xlab="line") ######################################################## #reshape from long to wide for clustering environments ######################################################## ds3wide<-dcast(ds2,env~line,mean) head(ds3wide) write.csv(ds3wide, file= "QG_Mod10_for_ALA10.3_envs_in_rows.csv") ######################################################## #cluster environments based on pheno ################### ######################################################## distance<-dist(ds3wide,method="euclidean") group<-hclust(distance,method="ward.D") plot(group,main="Cluster of envs with similar Yield responses across lines",xlab="env")