### Consider protein expression data from milk samples of two groups.
### Group A are low quality samples, group B are high quality samples.
### Exemplarily two proteins are studied.
### The two proteins shall be used to train a computer model to predict
### the quality group for samples with unknown group membership.

### Generation of training data set
### with expression levels for two proteins
### in n=10 low quality and n=10 high quality milk samples.
set.seed(123)
protein1.lowq = rnorm(10, 2.0, 3)
protein1.higq = rnorm(10, 4.0, 3)
protein2.lowq = rnorm(10, 2.0, 3)
protein2.higq = rnorm(10, 4.0, 3)
protein1 = c(protein1.lowq, protein1.higq)
protein2 = c(protein2.lowq, protein2.higq)
x.train = cbind(protein1, protein2)

### Generation of test data set
### with expression levels for two proteins
### in n=5 low quality and n=5 high quality milk samples.
protein1.lowq = rnorm(5, 2.0, 0.2)
protein1.higq = rnorm(5, 2.3, 0.2)
protein2.lowq = rnorm(5, 4.0, 0.3)
protein2.higq = rnorm(5, 3.6, 0.3)
protein1 = c(protein1.lowq, protein1.higq)
protein2 = c(protein2.lowq, protein2.higq)
x.test = cbind(protein1, protein2)

####################
### EXERCISE No 10.1
####################
### Training of a LDA-model using the training data.
### Prediction of group-membership for the 10 new samples
### from the test set.
library(MASS)
group = gl(2, 10)
M1 = lda(x.train, group)
predict(M1, x.test)$class

### Evaluation of model 1 by cross validation (CV)
test.classes.CV = lda(x.train, group, CV=TRUE)$class
T = table(group, test.classes.CV)

(T[1,1]+T[2,2])/sum(T) ### Accuracy
T[2,2]/(T[1,2]+T[2,2]) ### Sensitivity
T[1,1]/(T[1,1]+T[2,1]) ### Specificity
T[2,2]/(T[2,1]+T[2,2]) ### PPV
T[1,1]/(T[1,1]+T[1,2]) ### NPV

binom.test(16, 20)


####################
### EXERCISE No 10.2
####################
### Training of a SVM-model using the training data.
### Prediction of group-membership for 10 new individuals
### from the test set.
library("e1071")
group = gl(2, 10)
M2 = svm(x.train, group)
predict(M2, x.test)

### Evaluation of SVM-model by cross validation (CV)
test.classes.CV = rep(NA, 20)
for (i in 1:20) {
	M.i = svm(x.train[-i,], group[-i])
	test.classes.CV[i] = predict(M.i, x.train)[i]
}

T = table(group, test.classes.CV)
(T[1,1]+T[2,2])/sum(T) ### Accuracy
T[2,2]/(T[1,2]+T[2,2]) ### Sensitivity
T[1,1]/(T[1,1]+T[2,1]) ### Specificity
T[2,2]/(T[2,1]+T[2,2]) ### PPV
T[1,1]/(T[1,1]+T[1,2]) ### NPV


####################
### EXERCISE No 10.3
####################
# Modify parameters of the training and test data:
# Mean level, sample sizes, standard deviations
# Study the effect of these parameters on the classification performance measures.