Merge ~kub/dpcs:database into dpcs:master

Proposed by kub
Status: Needs review
Proposed branch: ~kub/dpcs:database
Merge into: dpcs:master
Diff against target: 209 lines (+183/-0)
3 files modified
docs/classificationB/ml.Rmd (+75/-0)
docs/classificationB/ml_multinomial.Rmd (+100/-0)
server/database/tables.sql (+8/-0)
Reviewer Review Type Date Requested Status
UW ML RG Board Pending
Review via email: mp+288224@code.launchpad.net
To post a comment you must log in.
~kub/dpcs:database updated
30e7f02... by kub

added classification

Unmerged commits

30e7f02... by kub

added classification

b293be8... by kub

er a commit message to explain why this merge is necessary,

ae72a32... by kub

some tidying

3b2fdc5... by kub

Merge branch 'database' of git+ssh://git.launchpad.net/~kub/dpcs into database

7afc1a4... by kub

simple script to create tables

33d621e... by kub

simple script to create tables

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/docs/classificationB/decision_trees_logistic_regression.odt b/docs/classificationB/decision_trees_logistic_regression.odt
0new file mode 1006440new file mode 100644
index 0000000..96144f6
1Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.odt differ1Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.odt differ
diff --git a/docs/classificationB/decision_trees_logistic_regression.pdf b/docs/classificationB/decision_trees_logistic_regression.pdf
2new file mode 1006442new file mode 100644
index 0000000..6f2564e
3Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.pdf differ3Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.pdf differ
diff --git a/docs/classificationB/ml.Rmd b/docs/classificationB/ml.Rmd
4new file mode 1006444new file mode 100644
index 0000000..d7e03b8
--- /dev/null
+++ b/docs/classificationB/ml.Rmd
@@ -0,0 +1,75 @@
1---
2title: "ML"
3output: word_document
4---
5
6```{r setup, include=FALSE}
7knitr::opts_chunk$set(echo = TRUE)
8```
9
10```{r echo = FALSE, message = FALSE, results="hide"}
11library("glmnet")
12library("tree")
13library("tictoc")
14
15dataSetup <- function(catNumber, trainingExamples, noise, testExamples){
16rule <<- rbinom(catNumber, 1, 0.5)
17xTrain <<- matrix(rbinom(trainingExamples * catNumber, 1, 0.5), trainingExamples, catNumber)
18yTrain <<- apply(xTrain == rule, 1, all)
19noiseMask <<- matrix(rbinom(trainingExamples, 1, noise), trainingExamples, 1)
20yTrain <<- xor(yTrain, noiseMask)
21xTest <<- matrix(rbinom(testExamples * catNumber, 1, 0.5), testExamples, catNumber)
22yTest <<- apply(xTest == rule, 1, all)
23}
24
25logistic <- function(catNumber, trainingExamples, noise, testExamples){
26
27 dataSetup(catNumber, trainingExamples, noise, testExamples)
28 tic()
29 regressionModel <- glmnet(xTrain, yTrain, family = "binomial")
30 regressionPrediction <- predict(regressionModel, newx=xTest, type = "class", s = 1e-3)
31 regressionTime <- toc()
32 regressionError <- sum(!regressionPrediction == yTest) / testExamples
33 regressionTime = regressionTime$toc - regressionTime$tic
34 return(c(regressionError, regressionTime))
35}
36
37tree2 <- function(catNumber, trainingExamples, noise, testExamples){
38
39 dataSetup(catNumber, trainingExamples, noise, testExamples)
40 dfTrain <- data.frame(yTrain, xTrain)
41 dfTest <- data.frame(yTest, xTest)
42 tic()
43 treeFit <- tree('yTrain~.', dfTrain)
44 treePrediction <- predict(treeFit, dfTest, type = "vector")
45 treePrediction <- treePrediction >= 0.5
46 treeError <- sum(!treePrediction == yTest) / testExamples
47 treeTime <- toc()
48 treeTime <- treeTime$toc - treeTime$tic
49 return(c(treeError, treeTime))
50}
51
52#parameters
53catNumber <- 5
54trainingExamples <- 1000
55noise <- 0.3
56testExamples <- 100
57series <- 100
58
59logisticPoints <- logistic(catNumber, trainingExamples, noise, testExamples)
60treePoints <- tree2(catNumber, trainingExamples, noise, testExamples)
61
62for(i in 2:series){#idiotyczne
63 logisticPoints <- cbind(logisticPoints, logistic(catNumber, trainingExamples, noise, testExamples))
64 treePoints <- cbind(treePoints, tree2(catNumber, trainingExamples, noise, testExamples))
65}
66
67
68plot(logisticPoints[1,], logisticPoints[2,], col = "red", xlim = c(0,1), ylim = c(0,0.3), xlab = "Error", ylab = "Time", main ="blue trees, red logreg")
69par(new = TRUE)
70plot(treePoints[1,], treePoints[2,], col = "blue", xlim = c(0,1), ylim = c(0,0.3), xlab = "", ylab ="")
71par(new = FALSE)
72
73
74```
75
diff --git a/docs/classificationB/ml_multinomial.Rmd b/docs/classificationB/ml_multinomial.Rmd
0new file mode 10064476new file mode 100644
index 0000000..9ddee67
--- /dev/null
+++ b/docs/classificationB/ml_multinomial.Rmd
@@ -0,0 +1,100 @@
1---
2title: "ML"
3output: word_document
4---
5
6```{r setup, include=FALSE}
7knitr::opts_chunk$set(echo = TRUE)
8```
9
10```{r echo = FALSE, message = FALSE, results="hide"}
11library("glmnet")
12library("tree")
13library("tictoc")
14
15dataSetup <- function(npred, class.size, nclass){
16
17class.labels <- as.character(1:nclass)
18class.predictors <- replicate(nclass,{
19 sample(1:npred, size=4)
20})
21colnames(class.predictors) <- class.labels
22
23x <- matrix(rbinom(npred*nclass*class.size, 1, 0.05), nrow=(class.size*nclass), ncol = npred) #random x
24y <- rep(class.labels, class.size)
25
26#set appriopriate cells in x to 1
27for (label in class.labels){
28 x[y==label, class.predictors[,label]] = 1
29}
30half <- floor(dim(x)[1]/2)
31train <- 1:half
32test <- (half+1):(2*half)
33
34xTrain <<- x[train,]
35yTrain <<- y[train]
36xTest <<- x[test,]
37yTest <<- y[test]
38
39#rule <<- rbinom(catNumber, 1, 0.5)
40#xTrain <<- matrix(rbinom(trainingExamples * catNumber, 1, 0.5), trainingExamples, catNumber)
41#yTrain <<- apply(xTrain == rule, 1, all)
42#noiseMask <<- matrix(rbinom(trainingExamples, 1, noise), trainingExamples, 1)
43#yTrain <<- xor(yTrain, noiseMask)
44#xTest <<- matrix(rbinom(testExamples * catNumber, 1, 0.5), testExamples, catNumber)
45#yTest <<- apply(xTest == rule, 1, all)
46}
47
48logistic <- function(npred, class.size, nclass){
49
50 dataSetup(npred, class.size, nclass)
51 tic()
52 regressionModel <- glmnet(xTrain, yTrain, family = "multinomial")
53 regressionPrediction <- predict(regressionModel, newx=xTest, type = "class", s= 1e-3) #* class.size * nclass) # 1e-8 * catNumber * trainingExamples)
54 regressionTime <- toc()
55 regressionError <- sum(!regressionPrediction == yTest) / (length(yTest))
56 regressionTime = regressionTime$toc - regressionTime$tic
57 return(c(regressionError, regressionTime))
58}
59
60tree2 <- function(npred, class.size, nclass){
61
62 dataSetup(npred, class.size, nclass)
63 dfTrain <- data.frame(yTrain, xTrain)
64 dfTest <- data.frame(yTest, xTest)
65 tic()
66 treeFit <- tree('yTrain~.', dfTrain)
67 treePrediction <- predict(treeFit, dfTest, type = "class")
68 #treePrediction <- treePrediction >= 0.5
69 treeError <- sum(!treePrediction == yTest) / (length(yTest))
70 treeTime <- toc()
71 treeTime <- treeTime$toc - treeTime$tic
72 return(c(treeError, treeTime))
73}
74
75#parameters
76npred <- 20
77class.size <-50
78nclass <- 5
79#catNumber <- 5
80#trainingExamples <- 100
81#noise <- 0.3
82#testExamples <- 100
83series <- 20
84
85logisticPoints <- logistic(npred, class.size, nclass)
86treePoints <- tree2(npred, class.size, nclass)
87
88for(i in 2:series){#idiotyczne
89 logisticPoints <- cbind(logisticPoints, logistic(npred, class.size, nclass))
90 treePoints <- cbind(treePoints, tree2(npred, class.size, nclass))
91}
92
93
94plot(logisticPoints[1,], logisticPoints[2,], col = "red",xlim = c(0,0.2), ylim = c(0,0.4), xlab = "Error", ylab = "Time", main ="blue trees, red logreg")
95par(new = TRUE)
96plot(treePoints[1,], treePoints[2,], col = "blue",xlim = c(0,0.2), ylim = c(0,0.4), xlab = "", ylab ="")
97par(new = FALSE)
98
99```
100
diff --git a/server/database/tables.sql b/server/database/tables.sql
0new file mode 100644101new file mode 100644
index 0000000..c137897
--- /dev/null
+++ b/server/database/tables.sql
@@ -0,0 +1,8 @@
1DROP DATABASE IF EXISTS crash_fixer;
2CREATE DATABASE crash_fixer;
3\c crash_fixer;
4CREATE TABLE applications(id INT PRIMARY KEY NOT NULL, name VARCHAR(250), version VARCHAR(250));
5CREATE TABLE system_info(id INT PRIMARY KEY, version VARCHAR(250));
6CREATE TABLE solutions(id INT PRIMARY KEY NOT NULL, details TEXT);
7CREATE TABLE crash_groups(id INT PRIMARY KEY NOT NULL, solution_id INT REFERENCES solutions(id));
8CREATE TABLE crash_reports(id INT PRIMARY KEY NOT NULL, url VARCHAR(250), crash_group_id INT REFERENCES crash_groups(id), application_id INT REFERENCES applications(id), system_info_id INT REFERENCES system_info(id), exit_code INT, stderr_output TEXT);

Subscribers

People subscribed via source and target branches

to all changes: