rm(list = ls()) options(max.print = 1000000) install.packages("dplyr") install.packages("ggdendro") install.packages("ggplot2") install.packages("ape") install.packages("dendextend") install.packages("fastcluster") library(tidyverse) library (fastcluster) library(dendextend)#### Install if not yet. library(ape) #### Install if not yet. library(dplyr) library(ggdendro) library(ggplot2) library(RColorBrewer) #make sure the working directory is correct getwd() #setwd("C:/Users/aamahama/Desktop/QG") setwd("C:/Users/aamahama/Documents/PBEA-SIL_Instructor_Guide-Project/PBEA-SIL Project/Instructor Guides/Quantitative Genetics - Instructor Resources/Module 4 Measures of Similarity/ALAs") #C:\Users\aamahama\Documents\PBEA-SIL_Instructor_Guide-Project\PBEA-SIL Project\Instructor Guides\Quantitative Genetics - Instructor Resources\Module 4 Measures of Similarity\ALAs #read in the data set refer to it as ALA 4.6 translated data set ALA4.6<-read.csv("QG_Mod4_ALA4.6 dstranslated no mv.csv", header=T) #remove rows, i.e., lines with missing data ALA4.6<-ALA4.6[complete.cases(ALA4.6),]#To remove NAs dim(ALA4.6) #Create two subsets. one with labels and family groups and the other with marker data ALA4.6families<-ALA4.6[,1] ALA4.6lines<-ALA4.6[,2] ALA4.6scores<-ALA4.6[,3:16] rownames(ALA4.6scores)<-ALA4.6[,2] # create a matrix of distance scores among all pairs of lines #convert the marker scores into a matrix ALA4.6mat<-ALA4.6scores %>% as.matrix() distance_ALA4.6<-dist(ALA4.6mat, method = "manhattan") cluster<-hclust(distance_ALA4.6, method= "ward.D2") branch6<-cutree(cluster,6) ALA4.6tree<-as.dendrogram(cluster) #plot(ALA4.6tree, main = "Cluster dendrogram of lines based on genomic data ", # sub = NULL,xlab = NULL, ylab = "Height", cex= 0.5) labels.ALA4.6.tree<-labels(ALA4.6tree) labels.ALA4.6.tree ########################################################### # The dendrogram tells us there are 6 groups. # We want to know if these 6 groups are related to the family structure. # To do this we are going to add color to our graphs. # One set of colors will be used for the branches and # another set of colors will be used for family membership. ###################################################################### ###################################################################### # The first step is to assign family membership to groups. ###################################################################### groupAlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "1")]) groupBlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "2")]) groupClines<-as.character(ALA4.6lines[which(ALA4.6families %in% "3")]) groupDlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "4")]) groupElines<-as.character(ALA4.6lines[which(ALA4.6families %in% "5")]) groupFlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "6")]) groupGlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "7")]) groupHlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "8")]) groupIlines<-as.character(ALA4.6lines[which(ALA4.6families %in% "9")]) ##################################################################### # The second step is to assign labels to family groups. ##################################################################### labelsgroupA.indeces<-which(labels.ALA4.6.tree %in% groupAlines) labelsgroupB.indeces<-which(labels.ALA4.6.tree %in% groupBlines) labelsgroupC.indeces<-which(labels.ALA4.6.tree %in% groupClines) labelsgroupD.indeces<-which(labels.ALA4.6.tree %in% groupDlines) labelsgroupE.indeces<-which(labels.ALA4.6.tree %in% groupElines) labelsgroupF.indeces<-which(labels.ALA4.6.tree %in% groupFlines) labelsgroupG.indeces<-which(labels.ALA4.6.tree %in% groupGlines) labelsgroupH.indeces<-which(labels.ALA4.6.tree %in% groupHlines) labelsgroupI.indeces<-which(labels.ALA4.6.tree %in% groupIlines) ##################################################################### # The third step is to assign colors to the labels. ##################################################################### #colors()# Use the colors command to help find acceptable colors #warnings() labels.color<-rep("x",length(labels.ALA4.6.tree)) labels.color[labelsgroupA.indeces]<-"red" labels.color[labelsgroupB.indeces]<-"blue" labels.color[labelsgroupC.indeces]<-"yellow" labels.color[labelsgroupD.indeces]<-"purple" labels.color[labelsgroupE.indeces]<-"gray" labels.color[labelsgroupF.indeces]<-"deeppink" labels.color[labelsgroupG.indeces]<-"black" labels.color[labelsgroupH.indeces]<-"cyan" labels.color[labelsgroupI.indeces]<-"orange" ##################################################################### # The fourth step is to assign colors to the branches. ##################################################################### branch.color<-c("black","green","darkgray","brown","lightgray","bisque2") ALA4.6tree2<- ALA4.6tree %>% set("branches_k_color",k=6) %>% set ("labels_colors",labels.color) plot(ALA4.6tree2, main = "Cluster dendrogram of lines based on genomic data ", sub = NULL,xlab = NULL, ylab = "Height", cex= 0.5) plot(as.phylo(ALA4.6tree2), type = "fan", tip.color=labels.color, label.offset = 0.3, cex = 0.5) #question 4 Transpose the data so that markers are in rows tdata <- t(demo_ca)#Use to transpose data for clustering markers tdata <- as.data.frame(tdata) tdata <- tdata[-1:-2,] tdata_markers<-tdata[,1] tdata_ca_mat<-tdata %>% as.matrix() distance_tdata_ca<-dist(tdata, method = "manhattan") cluster<-hclust(distance_tdata_ca, method= "ward.D2") branch6<-cutree(cluster, 6) tdata_ca_tree<-as.dendrogram(cluster) labels.demo_ca_tree<-labels(tdata) plot(tdata_ca_tree, main = "Cluster dendrogram of Markers based on genomic data ", sub = NULL,xlab = NULL, ylab = "Height", cex= 0.1) ### There are many type of representation of clusters ####