Merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk into lp:charms/trusty/apache-zeppelin

Proposed by Kevin W Monroe
Status: Merged
Merged at revision: 18
Proposed branch: lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk
Merge into: lp:charms/trusty/apache-zeppelin
Diff against target: 457 lines (+366/-20)
4 files modified
hooks/callbacks.py (+13/-7)
resources/flume-tutorial/note.json (+337/-0)
tests/00-setup (+6/-3)
tests/100-deploy-spark-hdfs-yarn (+10/-10)
To merge this branch: bzr merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk
Reviewer Review Type Date Requested Status
Kevin W Monroe Approve
Review via email: mp+271385@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Kevin W Monroe (kwmonroe) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'hooks/callbacks.py'
2--- hooks/callbacks.py 2015-08-25 02:14:22 +0000
3+++ hooks/callbacks.py 2015-09-16 21:19:37 +0000
4@@ -89,23 +89,29 @@
5 # default env). Include our own tutorial, which does work in a
6 # spark+hdfs env. Inspiration for this notebook came from here:
7 # https://github.com/apache/incubator-zeppelin/pull/46
8- tutorial_source = Path('resources/hdfs-tutorial')
9- tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / 'hdfs-tutorial')
10-
11- # move the tutorial dir included in the tarball to our notebook dir and
12- # symlink that dir under our zeppelin home. we've seen issues where
13- # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for
14- # notebooks in ZEPPELIN_HOME/notebook.
15 notebook_dir = self.dist_config.path('zeppelin_notebooks')
16 dist_notebook_dir = self.dist_config.path('zeppelin') / 'notebook'
17 dist_tutorial_dir = dist_notebook_dir.dirs()[0]
18 dist_tutorial_dir.move(notebook_dir)
19+ self.copy_tutorial("hdfs-tutorial")
20+ self.copy_tutorial("flume-tutorial")
21 dist_notebook_dir.rmtree_p()
22+ # move the tutorial dir included in the tarball to our notebook dir and
23+ # symlink that dir under our zeppelin home. we've seen issues where
24+ # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for
25+ # notebooks in ZEPPELIN_HOME/notebook.
26 notebook_dir.symlink(dist_notebook_dir)
27
28 # make sure the notebook dir's contents are owned by our user
29 cmd = "chown -R ubuntu:hadoop {}".format(notebook_dir)
30 call(cmd.split())
31+
32+
33+ def copy_tutorial(self, tutorial_name):
34+ tutorial_source = Path('resources/{}'.format(tutorial_name))
35+ tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / tutorial_name)
36+
37+
38
39 def configure_zeppelin(self):
40 '''
41
42=== added directory 'resources/flume-tutorial'
43=== added file 'resources/flume-tutorial/note.json'
44--- resources/flume-tutorial/note.json 1970-01-01 00:00:00 +0000
45+++ resources/flume-tutorial/note.json 2015-09-16 21:19:37 +0000
46@@ -0,0 +1,337 @@
47+{
48+ "paragraphs": [
49+ {
50+ "text": "%md\n## Welcome to Realtime Syslog Analytic Tutorial Powered by Juju.\n### In this live tutorial we will demonstrat three main phases of any big data solution:\n#### 1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\n#### 2. Data Processing - Apache Spark+YARN\n#### 3. Data Visualization - SparkSQL",
51+ "config": {
52+ "colWidth": 12.0,
53+ "graph": {
54+ "mode": "table",
55+ "height": 300.0,
56+ "optionOpen": false,
57+ "keys": [],
58+ "values": [],
59+ "groups": [],
60+ "scatter": {}
61+ }
62+ },
63+ "settings": {
64+ "params": {},
65+ "forms": {}
66+ },
67+ "jobName": "paragraph_1440101679810_1108841391",
68+ "id": "20150820-151439_133078543",
69+ "result": {
70+ "code": "SUCCESS",
71+ "type": "HTML",
72+ "msg": "\u003ch2\u003eWelcome to Realtime Syslog Analytic Tutorial Powered by Juju.\u003c/h2\u003e\n\u003ch3\u003eIn this live tutorial we will demonstrat three main phases of any big data solution:\u003c/h3\u003e\n\u003ch4\u003e1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\u003c/h4\u003e\n\u003ch4\u003e2. Data Processing - Apache Spark+YARN\u003c/h4\u003e\n\u003ch4\u003e3. Data Visualization - SparkSQL\u003c/h4\u003e\n"
73+ },
74+ "dateCreated": "Aug 20, 2015 3:14:39 PM",
75+ "dateStarted": "Aug 25, 2015 9:34:23 AM",
76+ "dateFinished": "Aug 25, 2015 9:34:23 AM",
77+ "status": "FINISHED",
78+ "progressUpdateIntervalMs": 500
79+ },
80+ {
81+ "title": "Data Ingestion",
82+ "text": "import sys.process._\n// Generate syslog messages by running an spakk\n\"/home/ubuntu/sparkpi.sh\" !!\n// Verify that FLume has collected and sent the syslog messages to HDFS\n\"hadoop fs -ls -R /user/flume/flume-syslog\" !!",
83+ "config": {
84+ "colWidth": 12.0,
85+ "graph": {
86+ "mode": "table",
87+ "height": 300.0,
88+ "optionOpen": false,
89+ "keys": [],
90+ "values": [],
91+ "groups": [],
92+ "scatter": {}
93+ },
94+ "title": true
95+ },
96+ "settings": {
97+ "params": {},
98+ "forms": {}
99+ },
100+ "jobName": "paragraph_1440112183363_1890510694",
101+ "id": "20150820-180943_1527660289",
102+ "result": {
103+ "code": "SUCCESS",
104+ "type": "TEXT",
105+ "msg": "" },
106+ "dateCreated": "Aug 20, 2015 6:09:43 PM",
107+ "dateStarted": "Aug 24, 2015 10:51:34 PM",
108+ "dateFinished": "Aug 24, 2015 10:52:11 PM",
109+ "status": "FINISHED",
110+ "progressUpdateIntervalMs": 500
111+ },
112+ {
113+ "title": "Data Processing in python",
114+ "text": "%pyspark\nsc.textFile(\"/user/flume/flume-syslog/*/*/*\").filter(lambda l: \"sshd\" in l).collect()",
115+ "config": {
116+ "colWidth": 12.0,
117+ "graph": {
118+ "mode": "table",
119+ "height": 300.0,
120+ "optionOpen": false,
121+ "keys": [],
122+ "values": [],
123+ "groups": [],
124+ "scatter": {}
125+ },
126+ "title": true,
127+ "tableHide": false,
128+ "editorHide": false
129+ },
130+ "settings": {
131+ "params": {},
132+ "forms": {}
133+ },
134+ "jobName": "paragraph_1440112260119_-1393028364",
135+ "id": "20150820-181100_389628381",
136+ "result": {
137+ "code": "SUCCESS",
138+ "type": "TEXT",
139+ "msg": "" },
140+ "dateCreated": "Aug 20, 2015 6:11:00 PM",
141+ "dateStarted": "Aug 24, 2015 10:54:10 PM",
142+ "dateFinished": "Aug 24, 2015 10:54:15 PM",
143+ "status": "FINISHED",
144+ "progressUpdateIntervalMs": 500
145+ },
146+ {
147+ "title": "Data Processing In Scala",
148+ "text": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nval reSystemLog \u003d \"\"\"^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\"\"\".r\ncase class SyslogMessage(timestamp: String, host: Option[String], process: String, pid: Int, message: String)\n\nval lines \u003d sc.textFile(\"/user/flume/flume-syslog/*/*/*\")\nval events \u003d lines.flatMap {\n case reSystemLog(timestamp,hostname, proc, pidS, msg) \u003d\u003e\n for {pid \u003c- Try(pidS.toInt).toOption} yield SyslogMessage(timestamp,Some(hostname), proc, pid, msg)\n case _ \u003d\u003e None\n }.toDF()\n\nevents.registerTempTable(\"syslog\")\n",
149+ "config": {
150+ "colWidth": 12.0,
151+ "graph": {
152+ "mode": "table",
153+ "height": 300.0,
154+ "optionOpen": false,
155+ "keys": [],
156+ "values": [],
157+ "groups": [],
158+ "scatter": {}
159+ },
160+ "title": true,
161+ "editorHide": false,
162+ "tableHide": false
163+ },
164+ "settings": {
165+ "params": {},
166+ "forms": {}
167+ },
168+ "jobName": "paragraph_1440133397982_798196016",
169+ "id": "20150821-000317_766530322",
170+ "result": {
171+ "code": "SUCCESS",
172+ "type": "TEXT",
173+ "msg": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nreSystemLog: scala.util.matching.Regex \u003d ^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\ndefined class SyslogMessage\nlines: org.apache.spark.rdd.RDD[String] \u003d /user/flume/flume-syslog/*/*/* MapPartitionsRDD[509] at textFile at \u003cconsole\u003e:73\nevents: org.apache.spark.sql.DataFrame \u003d [timestamp: string, host: string, process: string, pid: int, message: string]\n"
174+ },
175+ "dateCreated": "Aug 21, 2015 12:03:17 AM",
176+ "dateStarted": "Aug 24, 2015 10:54:28 PM",
177+ "dateFinished": "Aug 24, 2015 10:54:29 PM",
178+ "status": "FINISHED",
179+ "progressUpdateIntervalMs": 500
180+ },
181+ {
182+ "title": "Data Visualization",
183+ "text": "%sql \nselect process, count(1) value\nfrom syslog\ngroup by process \norder by process",
184+ "config": {
185+ "colWidth": 4.0,
186+ "graph": {
187+ "mode": "pieChart",
188+ "height": 300.0,
189+ "optionOpen": false,
190+ "keys": [
191+ {
192+ "name": "process",
193+ "index": 0.0,
194+ "aggr": "sum"
195+ }
196+ ],
197+ "values": [
198+ {
199+ "name": "value",
200+ "index": 1.0,
201+ "aggr": "sum"
202+ }
203+ ],
204+ "groups": [],
205+ "scatter": {
206+ "xAxis": {
207+ "name": "process",
208+ "index": 0.0,
209+ "aggr": "sum"
210+ },
211+ "yAxis": {
212+ "name": "value",
213+ "index": 1.0,
214+ "aggr": "sum"
215+ }
216+ }
217+ },
218+ "title": true
219+ },
220+ "settings": {
221+ "params": {},
222+ "forms": {}
223+ },
224+ "jobName": "paragraph_1440473498968_444762596",
225+ "id": "20150824-223138_1548703563",
226+ "result": {
227+ "code": "SUCCESS",
228+ "type": "TABLE",
229+ "msg": "process\tvalue\nCRON\t180\nntpdate\t1\nsshd\t6\nsu\t1\nsystemd-logind\t1\n"
230+ },
231+ "dateCreated": "Aug 24, 2015 10:31:38 PM",
232+ "dateStarted": "Aug 24, 2015 10:54:37 PM",
233+ "dateFinished": "Aug 24, 2015 10:54:41 PM",
234+ "status": "FINISHED",
235+ "progressUpdateIntervalMs": 500
236+ },
237+ {
238+ "title": "Data Visualization",
239+ "text": "%sql \nselect pid, count(1) value\nfrom syslog\nwhere pid \u003e 5000 and pid \u003c 20000 and timestamp \u003e ${maxDate\u003d\"Aug 24\"}\ngroup by pid \norder by pid\n",
240+ "config": {
241+ "colWidth": 4.0,
242+ "graph": {
243+ "mode": "pieChart",
244+ "height": 300.0,
245+ "optionOpen": false,
246+ "keys": [
247+ {
248+ "name": "pid",
249+ "index": 0.0,
250+ "aggr": "sum"
251+ }
252+ ],
253+ "values": [
254+ {
255+ "name": "value",
256+ "index": 1.0,
257+ "aggr": "sum"
258+ }
259+ ],
260+ "groups": [],
261+ "scatter": {
262+ "xAxis": {
263+ "name": "pid",
264+ "index": 0.0,
265+ "aggr": "sum"
266+ },
267+ "yAxis": {
268+ "name": "value",
269+ "index": 1.0,
270+ "aggr": "sum"
271+ }
272+ }
273+ },
274+ "title": true
275+ },
276+ "settings": {
277+ "params": {},
278+ "forms": {
279+ "maxDate": {
280+ "name": "maxDate",
281+ "defaultValue": "\"Aug 24\"",
282+ "hidden": false
283+ }
284+ }
285+ },
286+ "jobName": "paragraph_1440137477230_886878134",
287+ "id": "20150821-011117_310225391",
288+ "result": {
289+ "code": "SUCCESS",
290+ "type": "TABLE",
291+ "msg": "pid\tvalue\n5073\t2\n5074\t1\n5218\t2\n5219\t1\n5374\t2\n5375\t1\n5485\t2\n5881\t2\n5882\t1\n"
292+ },
293+ "dateCreated": "Aug 21, 2015 1:11:17 AM",
294+ "dateStarted": "Aug 24, 2015 10:54:43 PM",
295+ "dateFinished": "Aug 24, 2015 10:54:45 PM",
296+ "status": "FINISHED",
297+ "progressUpdateIntervalMs": 500
298+ },
299+ {
300+ "title": "Data Visualization",
301+ "text": "%sql \nselect timestamp, count(1) value\nfrom syslog\nwhere timestamp \u003e ${maxDate\u003d\"Aug 24\"} and process \u003d\u003d \"sshd\"\ngroup by timestamp\norder by timestamp",
302+ "config": {
303+ "colWidth": 4.0,
304+ "graph": {
305+ "mode": "pieChart",
306+ "height": 300.0,
307+ "optionOpen": false,
308+ "keys": [
309+ {
310+ "name": "timestamp",
311+ "index": 0.0,
312+ "aggr": "sum"
313+ }
314+ ],
315+ "values": [
316+ {
317+ "name": "value",
318+ "index": 1.0,
319+ "aggr": "sum"
320+ }
321+ ],
322+ "groups": [],
323+ "scatter": {
324+ "xAxis": {
325+ "name": "timestamp",
326+ "index": 0.0,
327+ "aggr": "sum"
328+ },
329+ "yAxis": {
330+ "name": "value",
331+ "index": 1.0,
332+ "aggr": "sum"
333+ }
334+ }
335+ },
336+ "title": true
337+ },
338+ "settings": {
339+ "params": {
340+ "maxDate": "\"Aug 20\""
341+ },
342+ "forms": {
343+ "maxDate": {
344+ "name": "maxDate",
345+ "defaultValue": "\"Aug 24\"",
346+ "hidden": false
347+ }
348+ }
349+ },
350+ "jobName": "paragraph_1440163786226_421898739",
351+ "id": "20150821-082946_601268612",
352+ "result": {
353+ "code": "SUCCESS",
354+ "type": "TABLE",
355+ "msg": "timestamp\tvalue\nAug 21 11:20:45\t2\nAug 21 19:58:30\t2\nAug 24 21:59:47\t2\n"
356+ },
357+ "dateCreated": "Aug 21, 2015 8:29:46 AM",
358+ "dateStarted": "Aug 24, 2015 10:54:54 PM",
359+ "dateFinished": "Aug 24, 2015 10:54:55 PM",
360+ "status": "FINISHED",
361+ "progressUpdateIntervalMs": 500
362+ },
363+ {
364+ "config": {},
365+ "settings": {
366+ "params": {},
367+ "forms": {}
368+ },
369+ "jobName": "paragraph_1440473909272_653880463",
370+ "id": "20150824-223829_186145308",
371+ "dateCreated": "Aug 24, 2015 10:38:29 PM",
372+ "status": "READY",
373+ "progressUpdateIntervalMs": 500
374+ }
375+ ],
376+ "name": "Real-time Analytic Tutorial",
377+ "id": "flume-tutorial",
378+ "angularObjects": {},
379+ "config": {
380+ "looknfeel": "default"
381+ },
382+ "info": {}
383+}
384
385=== added file 'resources/python/jujuresources-0.2.11.tar.gz'
386Binary files resources/python/jujuresources-0.2.11.tar.gz 1970-01-01 00:00:00 +0000 and resources/python/jujuresources-0.2.11.tar.gz 2015-09-16 21:19:37 +0000 differ
387=== removed file 'resources/python/jujuresources-0.2.9.tar.gz'
388Binary files resources/python/jujuresources-0.2.9.tar.gz 2015-06-29 21:07:04 +0000 and resources/python/jujuresources-0.2.9.tar.gz 1970-01-01 00:00:00 +0000 differ
389=== modified file 'tests/00-setup'
390--- tests/00-setup 2015-05-05 03:25:30 +0000
391+++ tests/00-setup 2015-09-16 21:19:37 +0000
392@@ -1,5 +1,8 @@
393 #!/bin/bash
394
395-sudo add-apt-repository ppa:juju/stable -y
396-sudo apt-get update
397-sudo apt-get install python3 amulet -y
398+if ! dpkg -s amulet &> /dev/null; then
399+ echo Installing Amulet...
400+ sudo add-apt-repository -y ppa:juju/stable
401+ sudo apt-get update
402+ sudo apt-get -y install amulet
403+fi
404
405=== modified file 'tests/100-deploy-spark-hdfs-yarn'
406--- tests/100-deploy-spark-hdfs-yarn 2015-08-25 02:14:22 +0000
407+++ tests/100-deploy-spark-hdfs-yarn 2015-09-16 21:19:37 +0000
408@@ -1,4 +1,4 @@
409-#!/usr/bin/python3
410+#!/usr/bin/env python3
411
412 import unittest
413 import amulet
414@@ -6,18 +6,18 @@
415
416 class TestDeploy(unittest.TestCase):
417 """
418- Deployment test for Apache Spark using HDFS as shared storage and YARN as
419- cluster job manager.
420+ Deployment test for Apache Spark+Zeppelin using HDFS as shared storage
421+ and YARN as cluster job manager.
422 """
423
424 @classmethod
425 def setUpClass(cls):
426 cls.d = amulet.Deployment(series='trusty')
427 # Deploy a hadoop cluster
428- cls.d.add('yarn-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-yarn-master')
429- cls.d.add('hdfs-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-hdfs-master')
430- cls.d.add('compute-slave', charm='cs:~bigdata-dev/trusty/apache-hadoop-compute-slave', units=3)
431- cls.d.add('plugin', charm='cs:~bigdata-dev/trusty/apache-hadoop-plugin')
432+ cls.d.add('yarn-master', charm='cs:trusty/apache-hadoop-yarn-master')
433+ cls.d.add('hdfs-master', charm='cs:trusty/apache-hadoop-hdfs-master')
434+ cls.d.add('compute-slave', charm='cs:trusty/apache-hadoop-compute-slave', units=3)
435+ cls.d.add('plugin', charm='cs:trusty/apache-hadoop-plugin')
436 cls.d.relate('yarn-master:namenode', 'hdfs-master:namenode')
437 cls.d.relate('compute-slave:nodemanager', 'yarn-master:nodemanager')
438 cls.d.relate('compute-slave:datanode', 'hdfs-master:datanode')
439@@ -25,15 +25,15 @@
440 cls.d.relate('plugin:namenode', 'hdfs-master:namenode')
441
442 # Add Spark Service
443- cls.d.add('spark', charm='cs:~bigdata-dev/trusty/apache-spark')
444+ cls.d.add('spark', charm='cs:trusty/apache-spark')
445 cls.d.relate('spark:hadoop-plugin', 'plugin:hadoop-plugin')
446
447 # Add Apache Zeppelin
448- cls.d.add('zeppelin', charm='cs:~bigdata-dev/trusty/apache-zeppelin')
449+ cls.d.add('zeppelin', charm='cs:trusty/apache-zeppelin')
450 cls.d.relate('zeppelin:spark', 'spark:spark')
451
452 cls.d.setup(timeout=3600)
453- cls.d.sentry.wait()
454+ cls.d.sentry.wait(timeout=3600)
455 cls.unit = cls.d.sentry.unit['zeppelin/0']
456
457 ###########################################################################

Subscribers

People subscribed via source and target branches