Juju Charms Collection
apache-zeppelin package

Merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk into lp:charms/trusty/apache-zeppelin

Trusty Tahr (14.04)
trunk
Merge into trunk

Proposed by Kevin W Monroe on 2015-09-16

Status:	Merged
Merged at revision:	18
Proposed branch:	lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk
Merge into:	lp:charms/trusty/apache-zeppelin
Diff against target:	457 lines (+366/-20) 4 files modified hooks/callbacks.py (+13/-7) resources/flume-tutorial/note.json (+337/-0) tests/00-setup (+6/-3) tests/100-deploy-spark-hdfs-yarn (+10/-10)
To merge this branch:	bzr merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Kevin W Monroe			Approve on 2015-09-16
Review via email: mp+271385@code.launchpad.net

Revision history for this message

Kevin W Monroe (kwmonroe) on 2015-09-16:

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Big Data Charmers

Juju Big Data Development

Juju Charms Collection
apache-zeppelin package

Merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk into lp:charms/trusty/apache-zeppelin

Commit message

Description of the change

Preview Diff

Subscribers

 === modified file 'hooks/callbacks.py'
 --- hooks/callbacks.py	2015-08-25 02:14:22 +0000
 +++ hooks/callbacks.py	2015-09-16 21:19:37 +0000
@@ -89,23 +89,29 @@
          # default env). Include our own tutorial, which does work in a
          # spark+hdfs env. Inspiration for this notebook came from here:
          #   https://github.com/apache/incubator-zeppelin/pull/46
--        tutorial_source = Path('resources/hdfs-tutorial')
--        tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / 'hdfs-tutorial')
--
--        # move the tutorial dir included in the tarball to our notebook dir and
--        # symlink that dir under our zeppelin home. we've seen issues where
--        # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for
--        # notebooks in ZEPPELIN_HOME/notebook.
          notebook_dir = self.dist_config.path('zeppelin_notebooks')
          dist_notebook_dir = self.dist_config.path('zeppelin') / 'notebook'
          dist_tutorial_dir = dist_notebook_dir.dirs()[0]
          dist_tutorial_dir.move(notebook_dir)
++        self.copy_tutorial("hdfs-tutorial")
++        self.copy_tutorial("flume-tutorial")
          dist_notebook_dir.rmtree_p()
++        # move the tutorial dir included in the tarball to our notebook dir and
++        # symlink that dir under our zeppelin home. we've seen issues where
++        # zepp doesn't honor ZEPPELIN_NOTEBOOK_DIR and instead looks for
++        # notebooks in ZEPPELIN_HOME/notebook.
          notebook_dir.symlink(dist_notebook_dir)
          # make sure the notebook dir's contents are owned by our user
          cmd = "chown -R ubuntu:hadoop {}".format(notebook_dir)
          call(cmd.split())
++
++
++    def copy_tutorial(self, tutorial_name):
++        tutorial_source = Path('resources/{}'.format(tutorial_name))
++        tutorial_source.copytree(self.dist_config.path('zeppelin_notebooks') / tutorial_name)
++
++
      def configure_zeppelin(self):
          '''
 === added directory 'resources/flume-tutorial'
 === added file 'resources/flume-tutorial/note.json'
 --- resources/flume-tutorial/note.json	1970-01-01 00:00:00 +0000
 +++ resources/flume-tutorial/note.json	2015-09-16 21:19:37 +0000
@@ -0,0 +1,337 @@
++{
++  "paragraphs": [
++    {
++      "text": "%md\n## Welcome to Realtime Syslog Analytic Tutorial Powered by Juju.\n### In this live tutorial we will demonstrat three main phases of any big data solution:\n#### 1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\n#### 2. Data Processing - Apache Spark+YARN\n#### 3. Data Visualization - SparkSQL",
++      "config": {
++        "colWidth": 12.0,
++        "graph": {
++          "mode": "table",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [],
++          "values": [],
++          "groups": [],
++          "scatter": {}
++        }
++      },
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440101679810_1108841391",
++      "id": "20150820-151439_133078543",
++      "result": {
++        "code": "SUCCESS",
++        "type": "HTML",
++        "msg": "\u003ch2\u003eWelcome to Realtime Syslog Analytic Tutorial Powered by Juju.\u003c/h2\u003e\n\u003ch3\u003eIn this live tutorial we will demonstrat three main phases of any big data solution:\u003c/h3\u003e\n\u003ch4\u003e1. Data Ingestion - Apache Flume-syslog -\u003e Apache flume-hdfs\u003c/h4\u003e\n\u003ch4\u003e2. Data Processing - Apache Spark+YARN\u003c/h4\u003e\n\u003ch4\u003e3. Data Visualization - SparkSQL\u003c/h4\u003e\n"
++      },
++      "dateCreated": "Aug 20, 2015 3:14:39 PM",
++      "dateStarted": "Aug 25, 2015 9:34:23 AM",
++      "dateFinished": "Aug 25, 2015 9:34:23 AM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Ingestion",
++      "text": "import sys.process._\n// Generate syslog messages by running an spakk\n\"/home/ubuntu/sparkpi.sh\" !!\n// Verify that FLume has collected and sent the syslog messages to HDFS\n\"hadoop fs -ls -R /user/flume/flume-syslog\" !!",
++      "config": {
++        "colWidth": 12.0,
++        "graph": {
++          "mode": "table",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [],
++          "values": [],
++          "groups": [],
++          "scatter": {}
++        },
++        "title": true
++      },
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440112183363_1890510694",
++      "id": "20150820-180943_1527660289",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TEXT",
++        "msg": ""      },
++      "dateCreated": "Aug 20, 2015 6:09:43 PM",
++      "dateStarted": "Aug 24, 2015 10:51:34 PM",
++      "dateFinished": "Aug 24, 2015 10:52:11 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Processing in python",
++      "text": "%pyspark\nsc.textFile(\"/user/flume/flume-syslog/*/*/*\").filter(lambda l: \"sshd\" in l).collect()",
++      "config": {
++        "colWidth": 12.0,
++        "graph": {
++          "mode": "table",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [],
++          "values": [],
++          "groups": [],
++          "scatter": {}
++        },
++        "title": true,
++        "tableHide": false,
++        "editorHide": false
++      },
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440112260119_-1393028364",
++      "id": "20150820-181100_389628381",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TEXT",
++        "msg": "" },
++      "dateCreated": "Aug 20, 2015 6:11:00 PM",
++      "dateStarted": "Aug 24, 2015 10:54:10 PM",
++      "dateFinished": "Aug 24, 2015 10:54:15 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Processing In Scala",
++      "text": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nval reSystemLog \u003d \"\"\"^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\"\"\".r\ncase class SyslogMessage(timestamp: String, host: Option[String], process: String, pid: Int, message: String)\n\nval lines \u003d sc.textFile(\"/user/flume/flume-syslog/*/*/*\")\nval events \u003d lines.flatMap {\n      case reSystemLog(timestamp,hostname, proc, pidS, msg) \u003d\u003e\n        for {pid \u003c- Try(pidS.toInt).toOption} yield SyslogMessage(timestamp,Some(hostname), proc, pid, msg)\n      case _ \u003d\u003e None\n    }.toDF()\n\nevents.registerTempTable(\"syslog\")\n",
++      "config": {
++        "colWidth": 12.0,
++        "graph": {
++          "mode": "table",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [],
++          "values": [],
++          "groups": [],
++          "scatter": {}
++        },
++        "title": true,
++        "editorHide": false,
++        "tableHide": false
++      },
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440133397982_798196016",
++      "id": "20150821-000317_766530322",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TEXT",
++        "msg": "import org.joda.time.DateTime\nimport org.joda.time.format.{DateTimeFormatterBuilder, DateTimeFormat}\nimport scala.util.Try\nreSystemLog: scala.util.matching.Regex \u003d ^\\\u003c\\d+\\\u003e([A-Za-z0-9, ]+\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?)\\s+(\\S+)\\s+([^\\[]+)\\[(\\d+)\\]\\s*:?\\s*(.*)\ndefined class SyslogMessage\nlines: org.apache.spark.rdd.RDD[String] \u003d /user/flume/flume-syslog/*/*/* MapPartitionsRDD[509] at textFile at \u003cconsole\u003e:73\nevents: org.apache.spark.sql.DataFrame \u003d [timestamp: string, host: string, process: string, pid: int, message: string]\n"
++      },
++      "dateCreated": "Aug 21, 2015 12:03:17 AM",
++      "dateStarted": "Aug 24, 2015 10:54:28 PM",
++      "dateFinished": "Aug 24, 2015 10:54:29 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Visualization",
++      "text": "%sql \nselect process, count(1) value\nfrom syslog\ngroup by process \norder by process",
++      "config": {
++        "colWidth": 4.0,
++        "graph": {
++          "mode": "pieChart",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [
++            {
++              "name": "process",
++              "index": 0.0,
++              "aggr": "sum"
++            }
++          ],
++          "values": [
++            {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          ],
++          "groups": [],
++          "scatter": {
++            "xAxis": {
++              "name": "process",
++              "index": 0.0,
++              "aggr": "sum"
++            },
++            "yAxis": {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          }
++        },
++        "title": true
++      },
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440473498968_444762596",
++      "id": "20150824-223138_1548703563",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TABLE",
++        "msg": "process\tvalue\nCRON\t180\nntpdate\t1\nsshd\t6\nsu\t1\nsystemd-logind\t1\n"
++      },
++      "dateCreated": "Aug 24, 2015 10:31:38 PM",
++      "dateStarted": "Aug 24, 2015 10:54:37 PM",
++      "dateFinished": "Aug 24, 2015 10:54:41 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Visualization",
++      "text": "%sql \nselect pid, count(1) value\nfrom syslog\nwhere pid \u003e 5000 and pid \u003c 20000 and timestamp \u003e ${maxDate\u003d\"Aug 24\"}\ngroup by pid \norder by pid\n",
++      "config": {
++        "colWidth": 4.0,
++        "graph": {
++          "mode": "pieChart",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [
++            {
++              "name": "pid",
++              "index": 0.0,
++              "aggr": "sum"
++            }
++          ],
++          "values": [
++            {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          ],
++          "groups": [],
++          "scatter": {
++            "xAxis": {
++              "name": "pid",
++              "index": 0.0,
++              "aggr": "sum"
++            },
++            "yAxis": {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          }
++        },
++        "title": true
++      },
++      "settings": {
++        "params": {},
++        "forms": {
++          "maxDate": {
++            "name": "maxDate",
++            "defaultValue": "\"Aug 24\"",
++            "hidden": false
++          }
++        }
++      },
++      "jobName": "paragraph_1440137477230_886878134",
++      "id": "20150821-011117_310225391",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TABLE",
++        "msg": "pid\tvalue\n5073\t2\n5074\t1\n5218\t2\n5219\t1\n5374\t2\n5375\t1\n5485\t2\n5881\t2\n5882\t1\n"
++      },
++      "dateCreated": "Aug 21, 2015 1:11:17 AM",
++      "dateStarted": "Aug 24, 2015 10:54:43 PM",
++      "dateFinished": "Aug 24, 2015 10:54:45 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "title": "Data Visualization",
++      "text": "%sql \nselect timestamp, count(1) value\nfrom syslog\nwhere timestamp \u003e ${maxDate\u003d\"Aug 24\"} and process \u003d\u003d \"sshd\"\ngroup by timestamp\norder by timestamp",
++      "config": {
++        "colWidth": 4.0,
++        "graph": {
++          "mode": "pieChart",
++          "height": 300.0,
++          "optionOpen": false,
++          "keys": [
++            {
++              "name": "timestamp",
++              "index": 0.0,
++              "aggr": "sum"
++            }
++          ],
++          "values": [
++            {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          ],
++          "groups": [],
++          "scatter": {
++            "xAxis": {
++              "name": "timestamp",
++              "index": 0.0,
++              "aggr": "sum"
++            },
++            "yAxis": {
++              "name": "value",
++              "index": 1.0,
++              "aggr": "sum"
++            }
++          }
++        },
++        "title": true
++      },
++      "settings": {
++        "params": {
++          "maxDate": "\"Aug 20\""
++        },
++        "forms": {
++          "maxDate": {
++            "name": "maxDate",
++            "defaultValue": "\"Aug 24\"",
++            "hidden": false
++          }
++        }
++      },
++      "jobName": "paragraph_1440163786226_421898739",
++      "id": "20150821-082946_601268612",
++      "result": {
++        "code": "SUCCESS",
++        "type": "TABLE",
++        "msg": "timestamp\tvalue\nAug 21 11:20:45\t2\nAug 21 19:58:30\t2\nAug 24 21:59:47\t2\n"
++      },
++      "dateCreated": "Aug 21, 2015 8:29:46 AM",
++      "dateStarted": "Aug 24, 2015 10:54:54 PM",
++      "dateFinished": "Aug 24, 2015 10:54:55 PM",
++      "status": "FINISHED",
++      "progressUpdateIntervalMs": 500
++    },
++    {
++      "config": {},
++      "settings": {
++        "params": {},
++        "forms": {}
++      },
++      "jobName": "paragraph_1440473909272_653880463",
++      "id": "20150824-223829_186145308",
++      "dateCreated": "Aug 24, 2015 10:38:29 PM",
++      "status": "READY",
++      "progressUpdateIntervalMs": 500
++    }
++  ],
++  "name": "Real-time Analytic Tutorial",
++  "id": "flume-tutorial",
++  "angularObjects": {},
++  "config": {
++    "looknfeel": "default"
++  },
++  "info": {}
++}
 === added file 'resources/python/jujuresources-0.2.11.tar.gz'
 Binary files resources/python/jujuresources-0.2.11.tar.gz	1970-01-01 00:00:00 +0000 and resources/python/jujuresources-0.2.11.tar.gz	2015-09-16 21:19:37 +0000 differ
 === removed file 'resources/python/jujuresources-0.2.9.tar.gz'
 Binary files resources/python/jujuresources-0.2.9.tar.gz	2015-06-29 21:07:04 +0000 and resources/python/jujuresources-0.2.9.tar.gz	1970-01-01 00:00:00 +0000 differ
 === modified file 'tests/00-setup'
 --- tests/00-setup	2015-05-05 03:25:30 +0000
 +++ tests/00-setup	2015-09-16 21:19:37 +0000
@@ -1,5 +1,8 @@
  #!/bin/bash
--sudo add-apt-repository ppa:juju/stable -y
--sudo apt-get update
--sudo apt-get install python3 amulet -y
++if ! dpkg -s amulet &> /dev/null; then
++    echo Installing Amulet...
++    sudo add-apt-repository -y ppa:juju/stable
++    sudo apt-get update
++    sudo apt-get -y install amulet
++fi
 === modified file 'tests/100-deploy-spark-hdfs-yarn'
 --- tests/100-deploy-spark-hdfs-yarn	2015-08-25 02:14:22 +0000
 +++ tests/100-deploy-spark-hdfs-yarn	2015-09-16 21:19:37 +0000
@@ -1,4 +1,4 @@
--#!/usr/bin/python3
++#!/usr/bin/env python3
  import unittest
  import amulet
@@ -6,18 +6,18 @@
  class TestDeploy(unittest.TestCase):
      """
--    Deployment test for Apache Spark using HDFS as shared storage and YARN as
--    cluster job manager.
++    Deployment test for Apache Spark+Zeppelin using HDFS as shared storage
++    and YARN as cluster job manager.
      """
      @classmethod
      def setUpClass(cls):
          cls.d = amulet.Deployment(series='trusty')
          # Deploy a hadoop cluster
--        cls.d.add('yarn-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-yarn-master')
--        cls.d.add('hdfs-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-hdfs-master')
--        cls.d.add('compute-slave', charm='cs:~bigdata-dev/trusty/apache-hadoop-compute-slave', units=3)
--        cls.d.add('plugin', charm='cs:~bigdata-dev/trusty/apache-hadoop-plugin')
++        cls.d.add('yarn-master', charm='cs:trusty/apache-hadoop-yarn-master')
++        cls.d.add('hdfs-master', charm='cs:trusty/apache-hadoop-hdfs-master')
++        cls.d.add('compute-slave', charm='cs:trusty/apache-hadoop-compute-slave', units=3)
++        cls.d.add('plugin', charm='cs:trusty/apache-hadoop-plugin')
          cls.d.relate('yarn-master:namenode', 'hdfs-master:namenode')
          cls.d.relate('compute-slave:nodemanager', 'yarn-master:nodemanager')
          cls.d.relate('compute-slave:datanode', 'hdfs-master:datanode')
@@ -25,15 +25,15 @@
          cls.d.relate('plugin:namenode', 'hdfs-master:namenode')
          # Add Spark Service
--        cls.d.add('spark', charm='cs:~bigdata-dev/trusty/apache-spark')
++        cls.d.add('spark', charm='cs:trusty/apache-spark')
          cls.d.relate('spark:hadoop-plugin', 'plugin:hadoop-plugin')
          # Add Apache Zeppelin
--        cls.d.add('zeppelin', charm='cs:~bigdata-dev/trusty/apache-zeppelin')
++        cls.d.add('zeppelin', charm='cs:trusty/apache-zeppelin')
          cls.d.relate('zeppelin:spark', 'spark:spark')
          cls.d.setup(timeout=3600)
--        cls.d.sentry.wait()
++        cls.d.sentry.wait(timeout=3600)
          cls.unit = cls.d.sentry.unit['zeppelin/0']
  ###########################################################################

Juju Charms Collectionapache-zeppelin package

Merge lp:~bigdata-dev/charms/trusty/apache-zeppelin/trunk into lp:charms/trusty/apache-zeppelin

Commit message

Description of the change

Preview Diff

Subscribers

Juju Charms Collection
apache-zeppelin package