Merge lp:~bigdata-dev/charms/bundles/apache-hadoop-spark/trunk into lp:~charmers/charms/bundles/apache-hadoop-spark/bundle

Proposed by Kevin W Monroe
Status: Merged
Merged at revision: 5
Proposed branch: lp:~bigdata-dev/charms/bundles/apache-hadoop-spark/trunk
Merge into: lp:~charmers/charms/bundles/apache-hadoop-spark/bundle
Diff against target: 264 lines (+142/-16)
6 files modified
README.md (+1/-1)
bundle-dev.yaml (+1/-1)
bundle-local.yaml (+7/-7)
bundle.yaml (+7/-7)
tests/01-bundle.py (+123/-0)
tests/tests.yaml (+3/-0)
To merge this branch: bzr merge lp:~bigdata-dev/charms/bundles/apache-hadoop-spark/trunk
Reviewer Review Type Date Requested Status
Kevin W Monroe Approve
Review via email: mp+286959@code.launchpad.net

Description of the change

updates from bigdata-dev:
- bump up compute-slave mem constraint
- move bundle.yaml charms to latest promulgated versions (ppc64le PoC is complete)
- add tests

To post a comment you must log in.
Revision history for this message
Kevin W Monroe (kwmonroe) wrote :

+1, aws tests successful

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'README.md'
2--- README.md 2015-06-26 17:04:43 +0000
3+++ README.md 2016-02-23 21:38:47 +0000
4@@ -14,7 +14,7 @@
5 ## Usage
6 Deploy this bundle using juju-quickstart:
7
8- juju quickstart u/bigdata-dev/apache-hadoop-spark
9+ juju quickstart apache-hadoop-spark
10
11 See `juju quickstart --help` for deployment options, including machine
12 constraints and how to deploy a locally modified version of the
13
14=== modified file 'bundle-dev.yaml'
15--- bundle-dev.yaml 2015-09-17 13:29:50 +0000
16+++ bundle-dev.yaml 2016-02-23 21:38:47 +0000
17@@ -5,7 +5,7 @@
18 annotations:
19 gui-x: "300"
20 gui-y: "200"
21- constraints: mem=3G
22+ constraints: mem=7G
23 hdfs-master:
24 charm: cs:~bigdata-dev/trusty/apache-hadoop-hdfs-master
25 num_units: 1
26
27=== modified file 'bundle-local.yaml'
28--- bundle-local.yaml 2015-09-17 13:29:50 +0000
29+++ bundle-local.yaml 2016-02-23 21:38:47 +0000
30@@ -1,39 +1,39 @@
31 services:
32 compute-slave:
33- charm: trusty/apache-hadoop-compute-slave
34+ charm: apache-hadoop-compute-slave
35 num_units: 3
36 annotations:
37 gui-x: "300"
38 gui-y: "200"
39- constraints: mem=3G
40+ constraints: mem=7G
41 hdfs-master:
42- charm: trusty/apache-hadoop-hdfs-master
43+ charm: apache-hadoop-hdfs-master
44 num_units: 1
45 annotations:
46 gui-x: "600"
47 gui-y: "350"
48 constraints: mem=7G
49 plugin:
50- charm: trusty/apache-hadoop-plugin
51+ charm: apache-hadoop-plugin
52 annotations:
53 gui-x: "900"
54 gui-y: "200"
55 secondary-namenode:
56- charm: trusty/apache-hadoop-hdfs-secondary
57+ charm: apache-hadoop-hdfs-secondary
58 num_units: 1
59 annotations:
60 gui-x: "600"
61 gui-y: "600"
62 constraints: mem=7G
63 spark:
64- charm: trusty/apache-spark
65+ charm: apache-spark
66 num_units: 1
67 annotations:
68 gui-x: "1200"
69 gui-y: "200"
70 constraints: mem=3G
71 yarn-master:
72- charm: trusty/apache-hadoop-yarn-master
73+ charm: apache-hadoop-yarn-master
74 num_units: 1
75 annotations:
76 gui-x: "600"
77
78=== modified file 'bundle.yaml'
79--- bundle.yaml 2015-09-17 13:29:50 +0000
80+++ bundle.yaml 2016-02-23 21:38:47 +0000
81@@ -1,39 +1,39 @@
82 services:
83 compute-slave:
84- charm: cs:trusty/apache-hadoop-compute-slave-8
85+ charm: cs:trusty/apache-hadoop-compute-slave-9
86 num_units: 3
87 annotations:
88 gui-x: "300"
89 gui-y: "200"
90- constraints: mem=3G
91+ constraints: mem=7G
92 hdfs-master:
93- charm: cs:trusty/apache-hadoop-hdfs-master-8
94+ charm: cs:trusty/apache-hadoop-hdfs-master-9
95 num_units: 1
96 annotations:
97 gui-x: "600"
98 gui-y: "350"
99 constraints: mem=7G
100 plugin:
101- charm: cs:trusty/apache-hadoop-plugin-7
102+ charm: cs:trusty/apache-hadoop-plugin-10
103 annotations:
104 gui-x: "900"
105 gui-y: "200"
106 secondary-namenode:
107- charm: cs:trusty/apache-hadoop-hdfs-secondary-6
108+ charm: cs:trusty/apache-hadoop-hdfs-secondary-7
109 num_units: 1
110 annotations:
111 gui-x: "600"
112 gui-y: "600"
113 constraints: mem=7G
114 spark:
115- charm: cs:trusty/apache-spark-3
116+ charm: cs:trusty/apache-spark-6
117 num_units: 1
118 annotations:
119 gui-x: "1200"
120 gui-y: "200"
121 constraints: mem=3G
122 yarn-master:
123- charm: cs:trusty/apache-hadoop-yarn-master-6
124+ charm: cs:trusty/apache-hadoop-yarn-master-7
125 num_units: 1
126 annotations:
127 gui-x: "600"
128
129=== added directory 'tests'
130=== added file 'tests/01-bundle.py'
131--- tests/01-bundle.py 1970-01-01 00:00:00 +0000
132+++ tests/01-bundle.py 2016-02-23 21:38:47 +0000
133@@ -0,0 +1,123 @@
134+#!/usr/bin/env python3
135+
136+import os
137+import unittest
138+
139+import yaml
140+import amulet
141+
142+
143+class TestBundle(unittest.TestCase):
144+ bundle_file = os.path.join(os.path.dirname(__file__), '..', 'bundle.yaml')
145+
146+ @classmethod
147+ def setUpClass(cls):
148+ cls.d = amulet.Deployment(series='trusty')
149+ with open(cls.bundle_file) as f:
150+ bun = f.read()
151+ bundle = yaml.safe_load(bun)
152+ cls.d.load(bundle)
153+ cls.d.setup(timeout=1800)
154+ cls.d.sentry.wait_for_messages({'spark': 'Ready'}, timeout=1800)
155+ cls.hdfs = cls.d.sentry['hdfs-master'][0]
156+ cls.yarn = cls.d.sentry['yarn-master'][0]
157+ cls.slave = cls.d.sentry['compute-slave'][0]
158+ cls.secondary = cls.d.sentry['secondary-namenode'][0]
159+ cls.spark = cls.d.sentry['spark'][0]
160+
161+ def test_components(self):
162+ """
163+ Confirm that all of the required components are up and running.
164+ """
165+ hdfs, retcode = self.hdfs.run("pgrep -a java")
166+ yarn, retcode = self.yarn.run("pgrep -a java")
167+ slave, retcode = self.slave.run("pgrep -a java")
168+ secondary, retcode = self.secondary.run("pgrep -a java")
169+ spark, retcode = self.spark.run("pgrep -a java")
170+
171+ # .NameNode needs the . to differentiate it from SecondaryNameNode
172+ assert '.NameNode' in hdfs, "NameNode not started"
173+ assert '.NameNode' not in yarn, "NameNode should not be running on yarn-master"
174+ assert '.NameNode' not in slave, "NameNode should not be running on compute-slave"
175+ assert '.NameNode' not in secondary, "NameNode should not be running on secondary-namenode"
176+ assert '.NameNode' not in spark, "NameNode should not be running on spark"
177+
178+ assert 'ResourceManager' in yarn, "ResourceManager not started"
179+ assert 'ResourceManager' not in hdfs, "ResourceManager should not be running on hdfs-master"
180+ assert 'ResourceManager' not in slave, "ResourceManager should not be running on compute-slave"
181+ assert 'ResourceManager' not in secondary, "ResourceManager should not be running on secondary-namenode"
182+ assert 'ResourceManager' not in spark, "ResourceManager should not be running on spark"
183+
184+ assert 'JobHistoryServer' in yarn, "JobHistoryServer not started"
185+ assert 'JobHistoryServer' not in hdfs, "JobHistoryServer should not be running on hdfs-master"
186+ assert 'JobHistoryServer' not in slave, "JobHistoryServer should not be running on compute-slave"
187+ assert 'JobHistoryServer' not in secondary, "JobHistoryServer should not be running on secondary-namenode"
188+ assert 'JobHistoryServer' not in spark, "JobHistoryServer should not be running on spark"
189+
190+ assert 'NodeManager' in slave, "NodeManager not started"
191+ assert 'NodeManager' not in yarn, "NodeManager should not be running on yarn-master"
192+ assert 'NodeManager' not in hdfs, "NodeManager should not be running on hdfs-master"
193+ assert 'NodeManager' not in secondary, "NodeManager should not be running on secondary-namenode"
194+ assert 'NodeManager' not in spark, "NodeManager should not be running on spark"
195+
196+ assert 'DataNode' in slave, "DataServer not started"
197+ assert 'DataNode' not in yarn, "DataNode should not be running on yarn-master"
198+ assert 'DataNode' not in hdfs, "DataNode should not be running on hdfs-master"
199+ assert 'DataNode' not in secondary, "DataNode should not be running on secondary-namenode"
200+ assert 'DataNode' not in spark, "DataNode should not be running on spark"
201+
202+ assert 'SecondaryNameNode' in secondary, "SecondaryNameNode not started"
203+ assert 'SecondaryNameNode' not in yarn, "SecondaryNameNode should not be running on yarn-master"
204+ assert 'SecondaryNameNode' not in hdfs, "SecondaryNameNode should not be running on hdfs-master"
205+ assert 'SecondaryNameNode' not in slave, "SecondaryNameNode should not be running on compute-slave"
206+ assert 'SecondaryNameNode' not in spark, "SecondaryNameNode should not be running on spark"
207+
208+ assert 'spark' in spark, 'Spark should be running on spark'
209+
210+ def test_hdfs_dir(self):
211+ """
212+ Validate admin few hadoop activities on HDFS cluster.
213+ 1) This test validates mkdir on hdfs cluster
214+ 2) This test validates change hdfs dir owner on the cluster
215+ 3) This test validates setting hdfs directory access permission on the cluster
216+
217+ NB: These are order-dependent, so must be done as part of a single test case.
218+ """
219+ output, retcode = self.spark.run("su hdfs -c 'hdfs dfs -mkdir -p /user/ubuntu'")
220+ assert retcode == 0, "Created a user directory on hdfs FAILED:\n{}".format(output)
221+ output, retcode = self.spark.run("su hdfs -c 'hdfs dfs -chown ubuntu:ubuntu /user/ubuntu'")
222+ assert retcode == 0, "Assigning an owner to hdfs directory FAILED:\n{}".format(output)
223+ output, retcode = self.spark.run("su hdfs -c 'hdfs dfs -chmod -R 755 /user/ubuntu'")
224+ assert retcode == 0, "seting directory permission on hdfs FAILED:\n{}".format(output)
225+
226+ def test_yarn_mapreduce_exe(self):
227+ """
228+ Validate yarn mapreduce operations:
229+ 1) validate mapreduce execution - writing to hdfs
230+ 2) validate successful mapreduce operation after the execution
231+ 3) validate mapreduce execution - reading and writing to hdfs
232+ 4) validate successful mapreduce operation after the execution
233+ 5) validate successful deletion of mapreduce operation result from hdfs
234+
235+ NB: These are order-dependent, so must be done as part of a single test case.
236+ """
237+ jar_file = '/usr/lib/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar'
238+ test_steps = [
239+ ('teragen', "su ubuntu -c 'hadoop jar {} teragen 10000 /user/ubuntu/teragenout'".format(jar_file)),
240+ ('mapreduce #1', "su hdfs -c 'hdfs dfs -ls /user/ubuntu/teragenout/_SUCCESS'"),
241+ ('terasort', "su ubuntu -c 'hadoop jar {} terasort /user/ubuntu/teragenout /user/ubuntu/terasortout'".
242+ format(jar_file)),
243+ ('mapreduce #2', "su hdfs -c 'hdfs dfs -ls /user/ubuntu/terasortout/_SUCCESS'"),
244+ ('cleanup', "su hdfs -c 'hdfs dfs -rm -r /user/ubuntu/teragenout'"),
245+ ]
246+ for name, step in test_steps:
247+ output, retcode = self.spark.run(step)
248+ assert retcode == 0, "{} FAILED:\n{}".format(name, output)
249+
250+ def test_spark(self):
251+ output, retcode = self.spark.run("su ubuntu -c 'bash -lc /home/ubuntu/sparkpi.sh 2>&1'")
252+ assert 'Pi is roughly' in output, 'SparkPI test failed: %s' % output
253+
254+
255+if __name__ == '__main__':
256+ unittest.main()
257
258=== added file 'tests/tests.yaml'
259--- tests/tests.yaml 1970-01-01 00:00:00 +0000
260+++ tests/tests.yaml 2016-02-23 21:38:47 +0000
261@@ -0,0 +1,3 @@
262+reset: false
263+packages:
264+ - amulet

Subscribers

People subscribed via source and target branches