## load packages library("cluster") library("clusterSim") # set working directory for module and check setwd("C:/Users/ufrei/Desktop/WD-module") getwd() #load the MeanMeanEarPhenotypes dataset and look at the structure data <- read.csv("MeanMeanEarPhenotypes.csv", header = T) str(data) #subestting dataset datap <- subset(data, select=c("GrWt", "ca300KWt")) PCAdata <-data.Normalization(datap, type="n1") head(PCAdata) #display data in a plot plot(PCAdata[c("GrWt", "ca300KWt")], col=data$Type, pch=16, main="Distribution of data for PCA example") legend("bottomright", c("Inbred", "F1", "F2", "BC1"), pch = 16, col=c("blue","red","green","black")) # find Eigen values of covariance matrix my.cov <-cov(PCAdata) my.eigen <-eigen(my.cov) my.eigen$values rownames(my.eigen$vectors) <- c("GrWt", "ca300KWt") colnames(my.eigen$vectors) <-c("PC1", "PC2") my.eigen$vectors sum(my.eigen$values) var(PCAdata$GrWt) + var(PCAdata$ca300KWt) #determine the slope of the PRincipal Component PC1.slope <- my.eigen$vectors[1,1]/my.eigen$vectors[2,1] PC2.slope <- my.eigen$vectors[1,2]/my.eigen$vectors[2,2] # overlay the graphic with the Eigen vectors abline(0, PC1.slope, col="green") abline(0, PC2.slope, col="red") # how much of the variation is explained by the principal components? PC1.var <-100 * (my.eigen$values[1]/sum(my.eigen$values)) PC2.var <-100 * (my.eigen$values[2]/sum(my.eigen$values)) PC1.var PC2.var scores <- (PCAdata * my.eigen$vectors) head(scores) xlab <- c("PC1, 81% of variance") ylab <- c("PC2, 19% of variance") plot(scores, ylim=c(-2,2), main="Data in terms of Eigen Vecotrs", xlab=xlab, ylab=ylab, col=data$Type, pch=16 ) legend("bottomright", c("Inbred", "F1", "F2", "BC1"), pch = 16, col=c("blue","red","green","black")) abline(0,0,col="green") abline(0,9000,col="red") # biplot arrows(0,0, my.eigen$vectors[,1]*sqrt(my.eigen$values[1]), my.eigen$vectors[,2]*sqrt(my.eigen$values[2]), length = 0.2) text(x=-1, y=.5, labels="GrWt") text(x=-1, y=-.5, labels="ca300KWt")