### Consider protein expression data from milk samples of two groups. ### Group A are low quality samples, group B are high quality samples. ### Exemplarily two proteins are studied. ### The two proteins shall be used to train a computer model to predict ### the quality group for samples with unknown group membership. ### Generation of training data set ### with expression levels for two proteins ### in n=10 low quality and n=10 high quality milk samples. set.seed(123) protein1.lowq = rnorm(10, 2.0, 3) protein1.higq = rnorm(10, 4.0, 3) protein2.lowq = rnorm(10, 2.0, 3) protein2.higq = rnorm(10, 4.0, 3) protein1 = c(protein1.lowq, protein1.higq) protein2 = c(protein2.lowq, protein2.higq) x.train = cbind(protein1, protein2) ### Generation of test data set ### with expression levels for two proteins ### in n=5 low quality and n=5 high quality milk samples. protein1.lowq = rnorm(5, 2.0, 0.2) protein1.higq = rnorm(5, 2.3, 0.2) protein2.lowq = rnorm(5, 4.0, 0.3) protein2.higq = rnorm(5, 3.6, 0.3) protein1 = c(protein1.lowq, protein1.higq) protein2 = c(protein2.lowq, protein2.higq) x.test = cbind(protein1, protein2) #################### ### EXERCISE No 10.1 #################### ### Training of a LDA-model using the training data. ### Prediction of group-membership for the 10 new samples ### from the test set. library(MASS) group = gl(2, 10) M1 = lda(x.train, group) predict(M1, x.test)$class ### Evaluation of model 1 by cross validation (CV) test.classes.CV = lda(x.train, group, CV=TRUE)$class T = table(group, test.classes.CV) (T[1,1]+T[2,2])/sum(T) ### Accuracy T[2,2]/(T[1,2]+T[2,2]) ### Sensitivity T[1,1]/(T[1,1]+T[2,1]) ### Specificity T[2,2]/(T[2,1]+T[2,2]) ### PPV T[1,1]/(T[1,1]+T[1,2]) ### NPV binom.test(16, 20) #################### ### EXERCISE No 10.2 #################### ### Training of a SVM-model using the training data. ### Prediction of group-membership for 10 new individuals ### from the test set. library("e1071") group = gl(2, 10) M2 = svm(x.train, group) predict(M2, x.test) ### Evaluation of SVM-model by cross validation (CV) test.classes.CV = rep(NA, 20) for (i in 1:20) { M.i = svm(x.train[-i,], group[-i]) test.classes.CV[i] = predict(M.i, x.train)[i] } T = table(group, test.classes.CV) (T[1,1]+T[2,2])/sum(T) ### Accuracy T[2,2]/(T[1,2]+T[2,2]) ### Sensitivity T[1,1]/(T[1,1]+T[2,1]) ### Specificity T[2,2]/(T[2,1]+T[2,2]) ### PPV T[1,1]/(T[1,1]+T[1,2]) ### NPV #################### ### EXERCISE No 10.3 #################### # Modify parameters of the training and test data: # Mean level, sample sizes, standard deviations # Study the effect of these parameters on the classification performance measures.