op = par()

###################
### Excercise 9 ###
###################
# Data Example:
# Consider the proteome of procine muscle samples directly 
# after slaughtering (Time 0) and 24 hours thereafter,
# in order to study changes in the meat quality.
# 
# Import the data of 753 proteins in 40 samples:
setwd("G:\\tiergenomik\\AG_bioinf\\teaching\\Master FPPE\\DataExamples")
Y0 = read.csv2("MeatProteomics.csv")
head(Y0)
proteins = Y0[,1]
Y = data.matrix(Y0[,2:41])
dim(Y)
head(Y)
d = nrow(Y)


### Exercise 9.1 - k-Means Clustering
#####################################
# Use only data of proteins 10 and 11 (or try other pairs)
# Do k-means clustering with k=2, k=3, k=...
# Visualize the results.
set.seed(124)
M = kmeans(t(Y[10:11,]), centers=2)
M$cluster
plot(Y[10,], Y[11,], cex.axis=1.5, cex.lab=1.5, xlab="Protein 10", ylab="Protein 11", col=M$cluster, lwd=3, main="k=2", cex.main=1.5)


### Exercise 9.2 a) - Dimension reduction with PCA
##################################################
# Run a PCA to reduce the dimensions of the full data set.
# Determine percentages of variance explained by 
# each principal component.
# Draw the PCA plot. Add percentages to the axis titles.
pca0 = prcomp(t(Y))
perc = round(100 * pca0$sdev/sum(pca0$sdev), 1)
perc
pca = predict(pca0)
plot(pca)


# Run again k-means clustering, now using all proteins.
# Use different number fo centers (k=2, k=3, ...).
# Determine the ration of variability within and between clusters.
# Which choice of k would you make?
# Color-code the clusters in the PCA plot.
M = kmeans(t(Y), centers=3)
M$tot.withinss/M$betweens
plot(pca, col=M$cluster)
library(scatterplot3d)
scatterplot3d(pca[,1], pca[,2], pca[,3], color=M$cluster)
par(op)


### Exercise 9.2 b) - Dimension reduction with t-SNE and UMAP
#############################################################
# Run t-SNE to reduce the dimensions of the full data set.
# Draw the t-SNE plot and color-code clusters determined by
# k-means clustering.
library(Rtsne)
T = Rtsne(t(Y), dims=2, perplexity = 10)$Y
plot(T[,1], T[,2], col=M$cluster)

# Run UMAP to reduce the dimensions of the full data set.
# Draw the UMAP plot and color-code clusters determined by
# k-means clustering.
library(umap)
U = umap(t(Y))$layout
plot(U[,1], U[,2], col=M$cluster)


### Exercise 9.3 - Hierarchical clustering
##########################################
# Calculate the distance matrix representing the distances
# between each pair of the 40 samples.
# Cluster the samples hierarchically using different methods
# (complete, single, ward.D, ...)
D = dist(t(Y))
H = hclust(D, method="complete")
plot(H, labels=FALSE, hang=-1)


### Use heatmaps to visualize clustering of rows and columns.
# Use different clustering methods for rows and columns.
# Split the heatmap according to larger clusters.
library(ComplexHeatmap)
Heatmap(Y,  clustering_method_columns="ward.D", column_split=3)