rm(list=ls())
options(max.print=2000000)

#make sure the working directory is correct
setwd("/Users/aamahama/Documents/PBEA-SIL_Instructor_Guide-Project/PBEA-SIL Project/Instructor Guides/Quantitative Genetics - Instructor Resources/Module 12 Multi Environment Trials Linear Mixed Models/ALAs")

check.packages <- function(pkg){
  new.pkg <- pkg[!(pkg%in%installed.packages()[,"Package"])]
  if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}
packages <- c('lme4','lsmeans')
check.packages(packages)

ds1<-read.csv("MET ds5.csv",header=T)
summary(ds1)
ds1$line<-factor(ds1$line)
ds1$env<-factor(ds1$env)
ds1$rep<-factor(ds1$rep)
ds1$mscore<-as.numeric(ds1$mscore)
summary(ds1)

############################################################
# consider analysis of variance of a model in which lines
# are considered categorical variables, that is to say
# factors in a factorial treatement design
# linear model: y=mean+env+line +res: What about GxE?
# linear model: y=mean+env+line + env:line + res
# since the data are balanced the MoM can be used
# but since we are interested in predicted values of the 
# lines, we are going to treat them as random effects
# in a mixed linear model
############################################################
attach(ds1)
glm<-(lm(yld~env*line))
lsm<-lsmeans(glm,~line)
lsm
write.csv(lsm,file="QG_Mod12_ALA12.4_lsm.csv", row.names=FALSE)

mixmodel = lmer(yld ~ env+(1|line)+(1|line:env))
mixmodel
mme<-ranef(mixmodel)
mme
write.csv(mme,file="QG_Mod12_ALA12.4_blupv.csv", row.names=FALSE)

detach(ds1)
###############################################################
## edit QG_Mod12_ALA12.4_blupv.csv by relabeling grp as line ##
## and condval as blupval.  Also remove the first 500 rows  ###
## because they are estimates of GxE and our interest is in ###
## blup values for lines 
## Next merge the standard lsm data with the blupval data   ###
###############################################################

leastsqrm<-read.csv("QG_Mod12_ALA12.4_lsm.csv",header=T)
leastsqr<-leastsqrm[c(1,2)]
leastsqr$line<-as.character(leastsqr$line)
leastsqr$lsmean<-as.numeric(leastsqr$lsmean)
summary(leastsqr)

blupall<-read.csv("QG_Mod12_ALA12.4_blupv.csv",header=T)
blupall$line<-as.character(blupall$line)
blupall$blupval<-as.numeric(blupall$blupval)
summary(blupall)

ds2<-merge(leastsqr,blupall,by= c("line"),all=TRUE)

attach(ds2)
plot(lsmean,blupval,pch=20, main= "lsm vs blup from all data ")
abline(lm(blupval~lsmean))
cor(lsmean,blupval)
detach(ds2)

###############################################################
## create a data set with only 1 rep per location            ##
###############################################################
attach(ds1)
ds1$rep<-as.character(ds1$rep)
ds3<-subset(ds1,ds1$rep=="2")
detach(ds1)

attach(ds3)
#glm<-(lm(yld~env*line))
#lsm<-lsmeans(glm,~line)
#write.csv(lsm,file="QG_Mod12_ALA12.4_lsm.csv", row.names=FALSE)

mixmodel = lmer(yld ~ env+(1|line)+(1|line:env))
###############################################################
## this model has a problem. there are no df for             ##
## estimating the residual term.  thus gxe has to be removed ##
###############################################################
mixmodel = lmer(yld ~ env+(1|line))
mixmodel
mme<-ranef(mixmodel)
mme
write.csv(mme,file="QG_Mod12_ALA12.4_blupv_half.csv", row.names=FALSE)
detach(ds3)

###############################################################
## edit QG_Mod12_ALA12.4_blupv half.csv 
## by relabeling grp as line and condval as blupval.  
## Next merge the standard lsm data with the blupval data   ###
###############################################################

bluphalf<-read.csv("QG_Mod12_ALA12.4_blupv_half.csv",header=T)
bluphalf$line<-as.character(bluphalf$line)
bluphalf$blupval<-as.numeric(bluphalf$blupval)
summary(bluphalf)

ds4<-merge(leastsqr,bluphalf,by= c("line"),all=TRUE)
attach(ds4)
plot(lsmean,blupval,pch=20, 
     main= "lsm from 2 rep data set vs blup from 1 rep data ")
abline(lm(blupval~lsmean))
cor(lsmean,blupval)
detach(ds4)

###############################################################
## the badly balanced data set with only 
## 1/4 of the original plots                                 ##
###############################################################

ds5<-read.csv("QG_Mod12_ALA12.4_onequarter_ds.csv",header=T)
summary(ds5)
ds5$line<-as.character(ds5$line)
ds5$env<-as.character(ds5$env)
ds5$mscore<-as.numeric(ds5$mscore)
summary(ds5)

attach(ds5)
mixmodel = lmer(yld ~ env+(1|line))
mixmodel
mme<-ranef(mixmodel)
write.csv(mme,file="QG_Mod12_ALA12.4_blupv_quarter.csv", row.names=FALSE)
detach(ds5)

###############################################################
## edit QG_Mod12_ALA12.4_blupv_quarter.csv                  ###
## by relabeling grp as line and condval as blupval. Retain ###
## the variable condsd and relabel it as blupsd so that CI  ###
## can be calculated for each individual line               ###
## Next merge the standard lsm data with the blupval data   ###
###############################################################

blupq<-read.csv("QG_Mod12_ALA12.4_blupv_quarter.csv",header=T)
blupq$line<-as.character(blupq$line)
blupq$blupval<-as.numeric(blupq$blupval)
summary(blupq)

ds6<-merge(leastsqr,blupq,by= c("line"),all=TRUE)
attach(ds6)
plot(lsmean,blupval,pch=20, 
     main= "lsm from 2 rep data set vs blup from only 250 plots ")
abline(lm(blupval~lsmean))
cor(lsmean,blupval)
detach(ds6)