op = par() ################### ### Excercise 9 ### ################### # Data Example: # Consider the proteome of procine muscle samples directly # after slaughtering (Time 0) and 24 hours thereafter, # in order to study changes in the meat quality. # # Import the data of 753 proteins in 40 samples: setwd("G:\\tiergenomik\\AG_bioinf\\teaching\\Master FPPE\\DataExamples") Y0 = read.csv2("MeatProteomics.csv") head(Y0) proteins = Y0[,1] Y = data.matrix(Y0[,2:41]) dim(Y) head(Y) d = nrow(Y) ### Exercise 9.1 - k-Means Clustering ##################################### # Use only data of proteins 10 and 11 (or try other pairs) # Do k-means clustering with k=2, k=3, k=... # Visualize the results. set.seed(124) M = kmeans(t(Y[10:11,]), centers=2) M$cluster plot(Y[10,], Y[11,], cex.axis=1.5, cex.lab=1.5, xlab="Protein 10", ylab="Protein 11", col=M$cluster, lwd=3, main="k=2", cex.main=1.5) ### Exercise 9.2 a) - Dimension reduction with PCA ################################################## # Run a PCA to reduce the dimensions of the full data set. # Determine percentages of variance explained by # each principal component. # Draw the PCA plot. Add percentages to the axis titles. pca0 = prcomp(t(Y)) perc = round(100 * pca0$sdev/sum(pca0$sdev), 1) perc pca = predict(pca0) plot(pca) # Run again k-means clustering, now using all proteins. # Use different number fo centers (k=2, k=3, ...). # Determine the ration of variability within and between clusters. # Which choice of k would you make? # Color-code the clusters in the PCA plot. M = kmeans(t(Y), centers=3) M$tot.withinss/M$betweens plot(pca, col=M$cluster) library(scatterplot3d) scatterplot3d(pca[,1], pca[,2], pca[,3], color=M$cluster) par(op) ### Exercise 9.2 b) - Dimension reduction with t-SNE and UMAP ############################################################# # Run t-SNE to reduce the dimensions of the full data set. # Draw the t-SNE plot and color-code clusters determined by # k-means clustering. library(Rtsne) T = Rtsne(t(Y), dims=2, perplexity = 10)$Y plot(T[,1], T[,2], col=M$cluster) # Run UMAP to reduce the dimensions of the full data set. # Draw the UMAP plot and color-code clusters determined by # k-means clustering. library(umap) U = umap(t(Y))$layout plot(U[,1], U[,2], col=M$cluster) ### Exercise 9.3 - Hierarchical clustering ########################################## # Calculate the distance matrix representing the distances # between each pair of the 40 samples. # Cluster the samples hierarchically using different methods # (complete, single, ward.D, ...) D = dist(t(Y)) H = hclust(D, method="complete") plot(H, labels=FALSE, hang=-1) ### Use heatmaps to visualize clustering of rows and columns. # Use different clustering methods for rows and columns. # Split the heatmap according to larger clusters. library(ComplexHeatmap) Heatmap(Y, clustering_method_columns="ward.D", column_split=3)