## load packages library("cluster") # set working directory for module and check setwd("C:/Users/ufrei/Desktop/WD-module") getwd() #load the MeanMeanEarPhenotypes dataset and look at the structure data <- read.csv("MeanMeanEarPhenotypes.csv", header = T) str(data) #remove the first column (Pedigree) of the dataset by substting datap <- subset(data, select=-(Pedigree:Type)) head(datap) ###### Generating the distance matrix # calculate an Euclidean distance matrix and round the data to three digits Dist <- dist(datap, method="euclidean") round(Dist, digits=3) Dist #### Hierarchical Clustering using different methods: single, complete, average=UPGMA x<- hclust(Dist, method="single") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering using single") x<- hclust(Dist, method="complete") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering using complete") x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering using average") ##### Playing around by excluding variables form theoriginal dataset head(datap) EarL <- subset(data, select=EarL) Dist <- dist(EarL, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on EarL") ca300KWt <- subset(data, select=ca300KWt) Dist <- dist(ca300KWt, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on ca300KWt") RowNo <- subset(data, select=RowNo.) Dist <- dist(RowNo, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on RowNo") EarDT <- subset(data, select=EarDT) Dist <- dist(EarDT, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on EarDT") Cobdim <-subset(data, select=EarL:CobDT) Dist <- dist(Cobdim, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on cob dimensions") Cob <-subset(data, select=c("RowNo.", "EarL", "CobWt")) Dist <- dist(Cob, method="euclidean") round(Dist, digits=3) x<- hclust(Dist, method="average") plot(x, labels = data$Pedigree, hang = -0.1, main = "Hierarchical Clustering based on selected variables")