Merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk into lp:charms/trusty/apache-zeppelin
- Trusty Tahr (14.04)
- trunk
- Merge into trunk
Proposed by
Kevin W Monroe
Status: | Merged |
---|---|
Merged at revision: | 18 |
Proposed branch: | lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk |
Merge into: | lp:charms/trusty/apache-zeppelin |
Diff against target: |
457 lines (+366/-20) 4 files modified
hooks/callbacks.py (+13/-7) resources/flume-tutorial/note.json (+337/-0) tests/00-setup (+6/-3) tests/100-deploy-spark-hdfs-yarn (+10/-10) |
To merge this branch: | bzr merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Kevin W Monroe | Approve | ||
Review via email: mp+271385@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Kevin W Monroe (kwmonroe) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'hooks/callbacks.py' |
2 | --- hooks/callbacks.py 2015-08-25 02:14:22 +0000 |
3 | +++ hooks/callbacks.py 2015-09-16 21:19:37 +0000 |
4 | @@ -89,23 +89,29 @@ |
5 | # default env). Include our own tutorial, which does work in a |
6 | # spark+hdfs env. Inspiration for this notebook came from here: |
7 | # https://github.com/apache/incubator-zeppelin/pull/46 |
8 | - tutorial_source = Path('resources/hdfs-tutorial') |
9 | - tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / 'hdfs-tutorial') |
10 | - |
11 | - # move the tutorial dir included in the tarball to our notebook dir and |
12 | - # symlink that dir under our zeppelin home. we've seen issues where |
13 | - # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for |
14 | - # notebooks in ZEPPELIN_HOME/notebook. |
15 | notebook_dir = self.dist_config.path('zeppelin_notebooks') |
16 | dist_notebook_dir = self.dist_config.path('zeppelin') / 'notebook' |
17 | dist_tutorial_dir = dist_notebook_dir.dirs()[0] |
18 | dist_tutorial_dir.move(notebook_dir) |
19 | + self.copy_tutorial("hdfs-tutorial") |
20 | + self.copy_tutorial("flume-tutorial") |
21 | dist_notebook_dir.rmtree_p() |
22 | + # move the tutorial dir included in the tarball to our notebook dir and |
23 | + # symlink that dir under our zeppelin home. we've seen issues where |
24 | + # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for |
25 | + # notebooks in ZEPPELIN_HOME/notebook. |
26 | notebook_dir.symlink(dist_notebook_dir) |
27 | |
28 | # make sure the notebook dir's contents are owned by our user |
29 | cmd = "chown -R ubuntu:hadoop {}".format(notebook_dir) |
30 | call(cmd.split()) |
31 | + |
32 | + |
33 | + def copy_tutorial(self, tutorial_name): |
34 | + tutorial_source = Path('resources/{}'.format(tutorial_name)) |
35 | + tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / tutorial_name) |
36 | + |
37 | + |
38 | |
39 | def configure_zeppelin(self): |
40 | ''' |
41 | |
42 | === added directory 'resources/flume-tutorial' |
43 | === added file 'resources/flume-tutorial/note.json' |
44 | --- resources/flume-tutorial/note.json 1970-01-01 00:00:00 +0000 |
45 | +++ resources/flume-tutorial/note.json 2015-09-16 21:19:37 +0000 |
46 | @@ -0,0 +1,337 @@ |
47 | +{ |
48 | + "paragraphs": [ |
49 | + { |
50 | + "text": "%md\n## Welcome to Realtime Syslog Analytic Tutorial Powered by Juju.\n### In this live tutorial we will demonstrat three main phases of any big data solution:\n#### 1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\n#### 2. Data Processing - Apache Spark+YARN\n#### 3. Data Visualization - SparkSQL", |
51 | + "config": { |
52 | + "colWidth": 12.0, |
53 | + "graph": { |
54 | + "mode": "table", |
55 | + "height": 300.0, |
56 | + "optionOpen": false, |
57 | + "keys": [], |
58 | + "values": [], |
59 | + "groups": [], |
60 | + "scatter": {} |
61 | + } |
62 | + }, |
63 | + "settings": { |
64 | + "params": {}, |
65 | + "forms": {} |
66 | + }, |
67 | + "jobName": "paragraph_1440101679810_1108841391", |
68 | + "id": "20150820-151439_133078543", |
69 | + "result": { |
70 | + "code": "SUCCESS", |
71 | + "type": "HTML", |
72 | + "msg": "\u003ch2\u003eWelcome to Realtime Syslog Analytic Tutorial Powered by Juju.\u003c/h2\u003e\n\u003ch3\u003eIn this live tutorial we will demonstrat three main phases of any big data solution:\u003c/h3\u003e\n\u003ch4\u003e1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\u003c/h4\u003e\n\u003ch4\u003e2. Data Processing - Apache Spark+YARN\u003c/h4\u003e\n\u003ch4\u003e3. Data Visualization - SparkSQL\u003c/h4\u003e\n" |
73 | + }, |
74 | + "dateCreated": "Aug 20, 2015 3:14:39 PM", |
75 | + "dateStarted": "Aug 25, 2015 9:34:23 AM", |
76 | + "dateFinished": "Aug 25, 2015 9:34:23 AM", |
77 | + "status": "FINISHED", |
78 | + "progressUpdateIntervalMs": 500 |
79 | + }, |
80 | + { |
81 | + "title": "Data Ingestion", |
82 | + "text": "import sys.process._\n// Generate syslog messages by running an spakk\n\"/home/ubuntu/sparkpi.sh\" !!\n// Verify that FLume has collected and sent the syslog messages to HDFS\n\"hadoop fs -ls -R /user/flume/flume-syslog\" !!", |
83 | + "config": { |
84 | + "colWidth": 12.0, |
85 | + "graph": { |
86 | + "mode": "table", |
87 | + "height": 300.0, |
88 | + "optionOpen": false, |
89 | + "keys": [], |
90 | + "values": [], |
91 | + "groups": [], |
92 | + "scatter": {} |
93 | + }, |
94 | + "title": true |
95 | + }, |
96 | + "settings": { |
97 | + "params": {}, |
98 | + "forms": {} |
99 | + }, |
100 | + "jobName": "paragraph_1440112183363_1890510694", |
101 | + "id": "20150820-180943_1527660289", |
102 | + "result": { |
103 | + "code": "SUCCESS", |
104 | + "type": "TEXT", |
105 | + "msg": "" }, |
106 | + "dateCreated": "Aug 20, 2015 6:09:43 PM", |
107 | + "dateStarted": "Aug 24, 2015 10:51:34 PM", |
108 | + "dateFinished": "Aug 24, 2015 10:52:11 PM", |
109 | + "status": "FINISHED", |
110 | + "progressUpdateIntervalMs": 500 |
111 | + }, |
112 | + { |
113 | + "title": "Data Processing in python", |
114 | + "text": "%pyspark\nsc.textFile(\"/user/flume/flume-syslog/*/*/*\").filter(lambda l: \"sshd\" in l).collect()", |
115 | + "config": { |
116 | + "colWidth": 12.0, |
117 | + "graph": { |
118 | + "mode": "table", |
119 | + "height": 300.0, |
120 | + "optionOpen": false, |
121 | + "keys": [], |
122 | + "values": [], |
123 | + "groups": [], |
124 | + "scatter": {} |
125 | + }, |
126 | + "title": true, |
127 | + "tableHide": false, |
128 | + "editorHide": false |
129 | + }, |
130 | + "settings": { |
131 | + "params": {}, |
132 | + "forms": {} |
133 | + }, |
134 | + "jobName": "paragraph_1440112260119_-1393028364", |
135 | + "id": "20150820-181100_389628381", |
136 | + "result": { |
137 | + "code": "SUCCESS", |
138 | + "type": "TEXT", |
139 | + "msg": "" }, |
140 | + "dateCreated": "Aug 20, 2015 6:11:00 PM", |
141 | + "dateStarted": "Aug 24, 2015 10:54:10 PM", |
142 | + "dateFinished": "Aug 24, 2015 10:54:15 PM", |
143 | + "status": "FINISHED", |
144 | + "progressUpdateIntervalMs": 500 |
145 | + }, |
146 | + { |
147 | + "title": "Data Processing In Scala", |
148 | + "text": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nval reSystemLog \u003d \"\"\"^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\"\"\".r\ncase class SyslogMessage(timestamp: String, host: Option[String], process: String, pid: Int, message: String)\n\nval lines \u003d sc.textFile(\"/user/flume/flume-syslog/*/*/*\")\nval events \u003d lines.flatMap {\n case reSystemLog(timestamp,hostname, proc, pidS, msg) \u003d\u003e\n for {pid \u003c- Try(pidS.toInt).toOption} yield SyslogMessage(timestamp,Some(hostname), proc, pid, msg)\n case _ \u003d\u003e None\n }.toDF()\n\nevents.registerTempTable(\"syslog\")\n", |
149 | + "config": { |
150 | + "colWidth": 12.0, |
151 | + "graph": { |
152 | + "mode": "table", |
153 | + "height": 300.0, |
154 | + "optionOpen": false, |
155 | + "keys": [], |
156 | + "values": [], |
157 | + "groups": [], |
158 | + "scatter": {} |
159 | + }, |
160 | + "title": true, |
161 | + "editorHide": false, |
162 | + "tableHide": false |
163 | + }, |
164 | + "settings": { |
165 | + "params": {}, |
166 | + "forms": {} |
167 | + }, |
168 | + "jobName": "paragraph_1440133397982_798196016", |
169 | + "id": "20150821-000317_766530322", |
170 | + "result": { |
171 | + "code": "SUCCESS", |
172 | + "type": "TEXT", |
173 | + "msg": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nreSystemLog: scala.util.matching.Regex \u003d ^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\ndefined class SyslogMessage\nlines: org.apache.spark.rdd.RDD[String] \u003d /user/flume/flume-syslog/*/*/* MapPartitionsRDD[509] at textFile at \u003cconsole\u003e:73\nevents: org.apache.spark.sql.DataFrame \u003d [timestamp: string, host: string, process: string, pid: int, message: string]\n" |
174 | + }, |
175 | + "dateCreated": "Aug 21, 2015 12:03:17 AM", |
176 | + "dateStarted": "Aug 24, 2015 10:54:28 PM", |
177 | + "dateFinished": "Aug 24, 2015 10:54:29 PM", |
178 | + "status": "FINISHED", |
179 | + "progressUpdateIntervalMs": 500 |
180 | + }, |
181 | + { |
182 | + "title": "Data Visualization", |
183 | + "text": "%sql \nselect process, count(1) value\nfrom syslog\ngroup by process \norder by process", |
184 | + "config": { |
185 | + "colWidth": 4.0, |
186 | + "graph": { |
187 | + "mode": "pieChart", |
188 | + "height": 300.0, |
189 | + "optionOpen": false, |
190 | + "keys": [ |
191 | + { |
192 | + "name": "process", |
193 | + "index": 0.0, |
194 | + "aggr": "sum" |
195 | + } |
196 | + ], |
197 | + "values": [ |
198 | + { |
199 | + "name": "value", |
200 | + "index": 1.0, |
201 | + "aggr": "sum" |
202 | + } |
203 | + ], |
204 | + "groups": [], |
205 | + "scatter": { |
206 | + "xAxis": { |
207 | + "name": "process", |
208 | + "index": 0.0, |
209 | + "aggr": "sum" |
210 | + }, |
211 | + "yAxis": { |
212 | + "name": "value", |
213 | + "index": 1.0, |
214 | + "aggr": "sum" |
215 | + } |
216 | + } |
217 | + }, |
218 | + "title": true |
219 | + }, |
220 | + "settings": { |
221 | + "params": {}, |
222 | + "forms": {} |
223 | + }, |
224 | + "jobName": "paragraph_1440473498968_444762596", |
225 | + "id": "20150824-223138_1548703563", |
226 | + "result": { |
227 | + "code": "SUCCESS", |
228 | + "type": "TABLE", |
229 | + "msg": "process\tvalue\nCRON\t180\nntpdate\t1\nsshd\t6\nsu\t1\nsystemd-logind\t1\n" |
230 | + }, |
231 | + "dateCreated": "Aug 24, 2015 10:31:38 PM", |
232 | + "dateStarted": "Aug 24, 2015 10:54:37 PM", |
233 | + "dateFinished": "Aug 24, 2015 10:54:41 PM", |
234 | + "status": "FINISHED", |
235 | + "progressUpdateIntervalMs": 500 |
236 | + }, |
237 | + { |
238 | + "title": "Data Visualization", |
239 | + "text": "%sql \nselect pid, count(1) value\nfrom syslog\nwhere pid \u003e 5000 and pid \u003c 20000 and timestamp \u003e ${maxDate\u003d\"Aug 24\"}\ngroup by pid \norder by pid\n", |
240 | + "config": { |
241 | + "colWidth": 4.0, |
242 | + "graph": { |
243 | + "mode": "pieChart", |
244 | + "height": 300.0, |
245 | + "optionOpen": false, |
246 | + "keys": [ |
247 | + { |
248 | + "name": "pid", |
249 | + "index": 0.0, |
250 | + "aggr": "sum" |
251 | + } |
252 | + ], |
253 | + "values": [ |
254 | + { |
255 | + "name": "value", |
256 | + "index": 1.0, |
257 | + "aggr": "sum" |
258 | + } |
259 | + ], |
260 | + "groups": [], |
261 | + "scatter": { |
262 | + "xAxis": { |
263 | + "name": "pid", |
264 | + "index": 0.0, |
265 | + "aggr": "sum" |
266 | + }, |
267 | + "yAxis": { |
268 | + "name": "value", |
269 | + "index": 1.0, |
270 | + "aggr": "sum" |
271 | + } |
272 | + } |
273 | + }, |
274 | + "title": true |
275 | + }, |
276 | + "settings": { |
277 | + "params": {}, |
278 | + "forms": { |
279 | + "maxDate": { |
280 | + "name": "maxDate", |
281 | + "defaultValue": "\"Aug 24\"", |
282 | + "hidden": false |
283 | + } |
284 | + } |
285 | + }, |
286 | + "jobName": "paragraph_1440137477230_886878134", |
287 | + "id": "20150821-011117_310225391", |
288 | + "result": { |
289 | + "code": "SUCCESS", |
290 | + "type": "TABLE", |
291 | + "msg": "pid\tvalue\n5073\t2\n5074\t1\n5218\t2\n5219\t1\n5374\t2\n5375\t1\n5485\t2\n5881\t2\n5882\t1\n" |
292 | + }, |
293 | + "dateCreated": "Aug 21, 2015 1:11:17 AM", |
294 | + "dateStarted": "Aug 24, 2015 10:54:43 PM", |
295 | + "dateFinished": "Aug 24, 2015 10:54:45 PM", |
296 | + "status": "FINISHED", |
297 | + "progressUpdateIntervalMs": 500 |
298 | + }, |
299 | + { |
300 | + "title": "Data Visualization", |
301 | + "text": "%sql \nselect timestamp, count(1) value\nfrom syslog\nwhere timestamp \u003e ${maxDate\u003d\"Aug 24\"} and process \u003d\u003d \"sshd\"\ngroup by timestamp\norder by timestamp", |
302 | + "config": { |
303 | + "colWidth": 4.0, |
304 | + "graph": { |
305 | + "mode": "pieChart", |
306 | + "height": 300.0, |
307 | + "optionOpen": false, |
308 | + "keys": [ |
309 | + { |
310 | + "name": "timestamp", |
311 | + "index": 0.0, |
312 | + "aggr": "sum" |
313 | + } |
314 | + ], |
315 | + "values": [ |
316 | + { |
317 | + "name": "value", |
318 | + "index": 1.0, |
319 | + "aggr": "sum" |
320 | + } |
321 | + ], |
322 | + "groups": [], |
323 | + "scatter": { |
324 | + "xAxis": { |
325 | + "name": "timestamp", |
326 | + "index": 0.0, |
327 | + "aggr": "sum" |
328 | + }, |
329 | + "yAxis": { |
330 | + "name": "value", |
331 | + "index": 1.0, |
332 | + "aggr": "sum" |
333 | + } |
334 | + } |
335 | + }, |
336 | + "title": true |
337 | + }, |
338 | + "settings": { |
339 | + "params": { |
340 | + "maxDate": "\"Aug 20\"" |
341 | + }, |
342 | + "forms": { |
343 | + "maxDate": { |
344 | + "name": "maxDate", |
345 | + "defaultValue": "\"Aug 24\"", |
346 | + "hidden": false |
347 | + } |
348 | + } |
349 | + }, |
350 | + "jobName": "paragraph_1440163786226_421898739", |
351 | + "id": "20150821-082946_601268612", |
352 | + "result": { |
353 | + "code": "SUCCESS", |
354 | + "type": "TABLE", |
355 | + "msg": "timestamp\tvalue\nAug 21 11:20:45\t2\nAug 21 19:58:30\t2\nAug 24 21:59:47\t2\n" |
356 | + }, |
357 | + "dateCreated": "Aug 21, 2015 8:29:46 AM", |
358 | + "dateStarted": "Aug 24, 2015 10:54:54 PM", |
359 | + "dateFinished": "Aug 24, 2015 10:54:55 PM", |
360 | + "status": "FINISHED", |
361 | + "progressUpdateIntervalMs": 500 |
362 | + }, |
363 | + { |
364 | + "config": {}, |
365 | + "settings": { |
366 | + "params": {}, |
367 | + "forms": {} |
368 | + }, |
369 | + "jobName": "paragraph_1440473909272_653880463", |
370 | + "id": "20150824-223829_186145308", |
371 | + "dateCreated": "Aug 24, 2015 10:38:29 PM", |
372 | + "status": "READY", |
373 | + "progressUpdateIntervalMs": 500 |
374 | + } |
375 | + ], |
376 | + "name": "Real-time Analytic Tutorial", |
377 | + "id": "flume-tutorial", |
378 | + "angularObjects": {}, |
379 | + "config": { |
380 | + "looknfeel": "default" |
381 | + }, |
382 | + "info": {} |
383 | +} |
384 | |
385 | === added file 'resources/python/jujuresources-0.2.11.tar.gz' |
386 | Binary files resources/python/jujuresources-0.2.11.tar.gz 1970-01-01 00:00:00 +0000 and resources/python/jujuresources-0.2.11.tar.gz 2015-09-16 21:19:37 +0000 differ |
387 | === removed file 'resources/python/jujuresources-0.2.9.tar.gz' |
388 | Binary files resources/python/jujuresources-0.2.9.tar.gz 2015-06-29 21:07:04 +0000 and resources/python/jujuresources-0.2.9.tar.gz 1970-01-01 00:00:00 +0000 differ |
389 | === modified file 'tests/00-setup' |
390 | --- tests/00-setup 2015-05-05 03:25:30 +0000 |
391 | +++ tests/00-setup 2015-09-16 21:19:37 +0000 |
392 | @@ -1,5 +1,8 @@ |
393 | #!/bin/bash |
394 | |
395 | -sudo add-apt-repository ppa:juju/stable -y |
396 | -sudo apt-get update |
397 | -sudo apt-get install python3 amulet -y |
398 | +if ! dpkg -s amulet &> /dev/null; then |
399 | + echo Installing Amulet... |
400 | + sudo add-apt-repository -y ppa:juju/stable |
401 | + sudo apt-get update |
402 | + sudo apt-get -y install amulet |
403 | +fi |
404 | |
405 | === modified file 'tests/100-deploy-spark-hdfs-yarn' |
406 | --- tests/100-deploy-spark-hdfs-yarn 2015-08-25 02:14:22 +0000 |
407 | +++ tests/100-deploy-spark-hdfs-yarn 2015-09-16 21:19:37 +0000 |
408 | @@ -1,4 +1,4 @@ |
409 | -#!/usr/bin/python3 |
410 | +#!/usr/bin/env python3 |
411 | |
412 | import unittest |
413 | import amulet |
414 | @@ -6,18 +6,18 @@ |
415 | |
416 | class TestDeploy(unittest.TestCase): |
417 | """ |
418 | - Deployment test for Apache Spark using HDFS as shared storage and YARN as |
419 | - cluster job manager. |
420 | + Deployment test for Apache Spark+Zeppelin using HDFS as shared storage |
421 | + and YARN as cluster job manager. |
422 | """ |
423 | |
424 | @classmethod |
425 | def setUpClass(cls): |
426 | cls.d = amulet.Deployment(series='trusty') |
427 | # Deploy a hadoop cluster |
428 | - cls.d.add('yarn-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-yarn-master') |
429 | - cls.d.add('hdfs-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-hdfs-master') |
430 | - cls.d.add('compute-slave', charm='cs:~bigdata-dev/trusty/apache-hadoop-compute-slave', units=3) |
431 | - cls.d.add('plugin', charm='cs:~bigdata-dev/trusty/apache-hadoop-plugin') |
432 | + cls.d.add('yarn-master', charm='cs:trusty/apache-hadoop-yarn-master') |
433 | + cls.d.add('hdfs-master', charm='cs:trusty/apache-hadoop-hdfs-master') |
434 | + cls.d.add('compute-slave', charm='cs:trusty/apache-hadoop-compute-slave', units=3) |
435 | + cls.d.add('plugin', charm='cs:trusty/apache-hadoop-plugin') |
436 | cls.d.relate('yarn-master:namenode', 'hdfs-master:namenode') |
437 | cls.d.relate('compute-slave:nodemanager', 'yarn-master:nodemanager') |
438 | cls.d.relate('compute-slave:datanode', 'hdfs-master:datanode') |
439 | @@ -25,15 +25,15 @@ |
440 | cls.d.relate('plugin:namenode', 'hdfs-master:namenode') |
441 | |
442 | # Add Spark Service |
443 | - cls.d.add('spark', charm='cs:~bigdata-dev/trusty/apache-spark') |
444 | + cls.d.add('spark', charm='cs:trusty/apache-spark') |
445 | cls.d.relate('spark:hadoop-plugin', 'plugin:hadoop-plugin') |
446 | |
447 | # Add Apache Zeppelin |
448 | - cls.d.add('zeppelin', charm='cs:~bigdata-dev/trusty/apache-zeppelin') |
449 | + cls.d.add('zeppelin', charm='cs:trusty/apache-zeppelin') |
450 | cls.d.relate('zeppelin:spark', 'spark:spark') |
451 | |
452 | cls.d.setup(timeout=3600) |
453 | - cls.d.sentry.wait() |
454 | + cls.d.sentry.wait(timeout=3600) |
455 | cls.unit = cls.d.sentry.unit['zeppelin/0'] |
456 | |
457 | ########################################################################### |