Merge ~kub/dpcs:database into dpcs:master

Proposed by kub
Status: Needs review
Proposed branch: ~kub/dpcs:database
Merge into: dpcs:master
Diff against target: 209 lines (+183/-0)
3 files modified
docs/classificationB/ml.Rmd (+75/-0)
docs/classificationB/ml_multinomial.Rmd (+100/-0)
server/database/tables.sql (+8/-0)
Reviewer Review Type Date Requested Status
UW ML RG Board Pending
Review via email: mp+288224@code.launchpad.net
To post a comment you must log in.
~kub/dpcs:database updated
30e7f02... by kub

added classification

Unmerged commits

30e7f02... by kub

added classification

b293be8... by kub

er a commit message to explain why this merge is necessary,

ae72a32... by kub

some tidying

3b2fdc5... by kub

Merge branch 'database' of git+ssh://git.launchpad.net/~kub/dpcs into database

7afc1a4... by kub

simple script to create tables

33d621e... by kub

simple script to create tables

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/docs/classificationB/decision_trees_logistic_regression.odt b/docs/classificationB/decision_trees_logistic_regression.odt
2new file mode 100644
3index 0000000..96144f6
4Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.odt differ
5diff --git a/docs/classificationB/decision_trees_logistic_regression.pdf b/docs/classificationB/decision_trees_logistic_regression.pdf
6new file mode 100644
7index 0000000..6f2564e
8Binary files /dev/null and b/docs/classificationB/decision_trees_logistic_regression.pdf differ
9diff --git a/docs/classificationB/ml.Rmd b/docs/classificationB/ml.Rmd
10new file mode 100644
11index 0000000..d7e03b8
12--- /dev/null
13+++ b/docs/classificationB/ml.Rmd
14@@ -0,0 +1,75 @@
15+---
16+title: "ML"
17+output: word_document
18+---
19+
20+```{r setup, include=FALSE}
21+knitr::opts_chunk$set(echo = TRUE)
22+```
23+
24+```{r echo = FALSE, message = FALSE, results="hide"}
25+library("glmnet")
26+library("tree")
27+library("tictoc")
28+
29+dataSetup <- function(catNumber, trainingExamples, noise, testExamples){
30+rule <<- rbinom(catNumber, 1, 0.5)
31+xTrain <<- matrix(rbinom(trainingExamples * catNumber, 1, 0.5), trainingExamples, catNumber)
32+yTrain <<- apply(xTrain == rule, 1, all)
33+noiseMask <<- matrix(rbinom(trainingExamples, 1, noise), trainingExamples, 1)
34+yTrain <<- xor(yTrain, noiseMask)
35+xTest <<- matrix(rbinom(testExamples * catNumber, 1, 0.5), testExamples, catNumber)
36+yTest <<- apply(xTest == rule, 1, all)
37+}
38+
39+logistic <- function(catNumber, trainingExamples, noise, testExamples){
40+
41+ dataSetup(catNumber, trainingExamples, noise, testExamples)
42+ tic()
43+ regressionModel <- glmnet(xTrain, yTrain, family = "binomial")
44+ regressionPrediction <- predict(regressionModel, newx=xTest, type = "class", s = 1e-3)
45+ regressionTime <- toc()
46+ regressionError <- sum(!regressionPrediction == yTest) / testExamples
47+ regressionTime = regressionTime$toc - regressionTime$tic
48+ return(c(regressionError, regressionTime))
49+}
50+
51+tree2 <- function(catNumber, trainingExamples, noise, testExamples){
52+
53+ dataSetup(catNumber, trainingExamples, noise, testExamples)
54+ dfTrain <- data.frame(yTrain, xTrain)
55+ dfTest <- data.frame(yTest, xTest)
56+ tic()
57+ treeFit <- tree('yTrain~.', dfTrain)
58+ treePrediction <- predict(treeFit, dfTest, type = "vector")
59+ treePrediction <- treePrediction >= 0.5
60+ treeError <- sum(!treePrediction == yTest) / testExamples
61+ treeTime <- toc()
62+ treeTime <- treeTime$toc - treeTime$tic
63+ return(c(treeError, treeTime))
64+}
65+
66+#parameters
67+catNumber <- 5
68+trainingExamples <- 1000
69+noise <- 0.3
70+testExamples <- 100
71+series <- 100
72+
73+logisticPoints <- logistic(catNumber, trainingExamples, noise, testExamples)
74+treePoints <- tree2(catNumber, trainingExamples, noise, testExamples)
75+
76+for(i in 2:series){#idiotyczne
77+ logisticPoints <- cbind(logisticPoints, logistic(catNumber, trainingExamples, noise, testExamples))
78+ treePoints <- cbind(treePoints, tree2(catNumber, trainingExamples, noise, testExamples))
79+}
80+
81+
82+plot(logisticPoints[1,], logisticPoints[2,], col = "red", xlim = c(0,1), ylim = c(0,0.3), xlab = "Error", ylab = "Time", main ="blue trees, red logreg")
83+par(new = TRUE)
84+plot(treePoints[1,], treePoints[2,], col = "blue", xlim = c(0,1), ylim = c(0,0.3), xlab = "", ylab ="")
85+par(new = FALSE)
86+
87+
88+```
89+
90diff --git a/docs/classificationB/ml_multinomial.Rmd b/docs/classificationB/ml_multinomial.Rmd
91new file mode 100644
92index 0000000..9ddee67
93--- /dev/null
94+++ b/docs/classificationB/ml_multinomial.Rmd
95@@ -0,0 +1,100 @@
96+---
97+title: "ML"
98+output: word_document
99+---
100+
101+```{r setup, include=FALSE}
102+knitr::opts_chunk$set(echo = TRUE)
103+```
104+
105+```{r echo = FALSE, message = FALSE, results="hide"}
106+library("glmnet")
107+library("tree")
108+library("tictoc")
109+
110+dataSetup <- function(npred, class.size, nclass){
111+
112+class.labels <- as.character(1:nclass)
113+class.predictors <- replicate(nclass,{
114+ sample(1:npred, size=4)
115+})
116+colnames(class.predictors) <- class.labels
117+
118+x <- matrix(rbinom(npred*nclass*class.size, 1, 0.05), nrow=(class.size*nclass), ncol = npred) #random x
119+y <- rep(class.labels, class.size)
120+
121+#set appriopriate cells in x to 1
122+for (label in class.labels){
123+ x[y==label, class.predictors[,label]] = 1
124+}
125+half <- floor(dim(x)[1]/2)
126+train <- 1:half
127+test <- (half+1):(2*half)
128+
129+xTrain <<- x[train,]
130+yTrain <<- y[train]
131+xTest <<- x[test,]
132+yTest <<- y[test]
133+
134+#rule <<- rbinom(catNumber, 1, 0.5)
135+#xTrain <<- matrix(rbinom(trainingExamples * catNumber, 1, 0.5), trainingExamples, catNumber)
136+#yTrain <<- apply(xTrain == rule, 1, all)
137+#noiseMask <<- matrix(rbinom(trainingExamples, 1, noise), trainingExamples, 1)
138+#yTrain <<- xor(yTrain, noiseMask)
139+#xTest <<- matrix(rbinom(testExamples * catNumber, 1, 0.5), testExamples, catNumber)
140+#yTest <<- apply(xTest == rule, 1, all)
141+}
142+
143+logistic <- function(npred, class.size, nclass){
144+
145+ dataSetup(npred, class.size, nclass)
146+ tic()
147+ regressionModel <- glmnet(xTrain, yTrain, family = "multinomial")
148+ regressionPrediction <- predict(regressionModel, newx=xTest, type = "class", s= 1e-3) #* class.size * nclass) # 1e-8 * catNumber * trainingExamples)
149+ regressionTime <- toc()
150+ regressionError <- sum(!regressionPrediction == yTest) / (length(yTest))
151+ regressionTime = regressionTime$toc - regressionTime$tic
152+ return(c(regressionError, regressionTime))
153+}
154+
155+tree2 <- function(npred, class.size, nclass){
156+
157+ dataSetup(npred, class.size, nclass)
158+ dfTrain <- data.frame(yTrain, xTrain)
159+ dfTest <- data.frame(yTest, xTest)
160+ tic()
161+ treeFit <- tree('yTrain~.', dfTrain)
162+ treePrediction <- predict(treeFit, dfTest, type = "class")
163+ #treePrediction <- treePrediction >= 0.5
164+ treeError <- sum(!treePrediction == yTest) / (length(yTest))
165+ treeTime <- toc()
166+ treeTime <- treeTime$toc - treeTime$tic
167+ return(c(treeError, treeTime))
168+}
169+
170+#parameters
171+npred <- 20
172+class.size <-50
173+nclass <- 5
174+#catNumber <- 5
175+#trainingExamples <- 100
176+#noise <- 0.3
177+#testExamples <- 100
178+series <- 20
179+
180+logisticPoints <- logistic(npred, class.size, nclass)
181+treePoints <- tree2(npred, class.size, nclass)
182+
183+for(i in 2:series){#idiotyczne
184+ logisticPoints <- cbind(logisticPoints, logistic(npred, class.size, nclass))
185+ treePoints <- cbind(treePoints, tree2(npred, class.size, nclass))
186+}
187+
188+
189+plot(logisticPoints[1,], logisticPoints[2,], col = "red",xlim = c(0,0.2), ylim = c(0,0.4), xlab = "Error", ylab = "Time", main ="blue trees, red logreg")
190+par(new = TRUE)
191+plot(treePoints[1,], treePoints[2,], col = "blue",xlim = c(0,0.2), ylim = c(0,0.4), xlab = "", ylab ="")
192+par(new = FALSE)
193+
194+```
195+
196diff --git a/server/database/tables.sql b/server/database/tables.sql
197new file mode 100644
198index 0000000..c137897
199--- /dev/null
200+++ b/server/database/tables.sql
201@@ -0,0 +1,8 @@
202+DROP DATABASE IF EXISTS crash_fixer;
203+CREATE DATABASE crash_fixer;
204+\c crash_fixer;
205+CREATE TABLE applications(id INT PRIMARY KEY NOT NULL, name VARCHAR(250), version VARCHAR(250));
206+CREATE TABLE system_info(id INT PRIMARY KEY, version VARCHAR(250));
207+CREATE TABLE solutions(id INT PRIMARY KEY NOT NULL, details TEXT);
208+CREATE TABLE crash_groups(id INT PRIMARY KEY NOT NULL, solution_id INT REFERENCES solutions(id));
209+CREATE TABLE crash_reports(id INT PRIMARY KEY NOT NULL, url VARCHAR(250), crash_group_id INT REFERENCES crash_groups(id), application_id INT REFERENCES applications(id), system_info_id INT REFERENCES system_info(id), exit_code INT, stderr_output TEXT);

Subscribers

People subscribed via source and target branches

to all changes: