Merge lp:~bigdata-dev/charms/trusty/hdp-hive/hive-fixes into lp:charms/trusty/hdp-hive

Proposed by amir sanjar
Status: Needs review
Proposed branch: lp:~bigdata-dev/charms/trusty/hdp-hive/hive-fixes
Merge into: lp:charms/trusty/hdp-hive
Diff against target: 247 lines (+78/-13)
5 files modified
README.md (+2/-1)
hooks/hdp-hive-common.py (+55/-4)
metadata.yaml (+3/-1)
tests/01-deploy-hive-cluster (+12/-3)
tests/hive-hadoop-sql-cluster.yaml (+6/-4)
To merge this branch: bzr merge lp:~bigdata-dev/charms/trusty/hdp-hive/hive-fixes
Reviewer Review Type Date Requested Status
Charles Butler (community) Needs Fixing
Review via email: mp+243342@code.launchpad.net

Description of the change

adding elasticsearch and Openstack support to hive

To post a comment you must log in.
Revision history for this message
Charles Butler (lazypower) wrote :

Thanks for the proposed fixes Amir, I have however run into an issue with the MP - it appears there is an issue with the amulet tests:

The hive tests are failing during testing:
2014-12-08 13:46:45 Adding relation hdp-hive:db <-> mysql:db
2014-12-08 13:47:51 Deployment complete in 3444.33 seconds
 Hive Create table FAILED

Upon further investigation I see that hive is indeed connected to hadoop - and i was able to get an HDFS listing. So it appears the issue is with the test and not with the deployment, as the command works when being run locally on the node:

hive> create table test(col1 int, col2 string);
OK
Time taken: 2.465 seconds

Thanks for the submission, keep up the great work

review: Needs Fixing

Unmerged revisions

15. By amir sanjar

Merge elasticserach + Openstak with trunk

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'README.md'
2--- README.md 2014-09-10 20:48:42 +0000
3+++ README.md 2014-12-01 23:23:55 +0000
4@@ -63,6 +63,7 @@
5 juju add-relation hdphive:namenode yarn-hdfs-master:namenode
6 # associate Hive with resourcemanager
7 juju add-relation hdphive:resourcemanager yarn-hdfs-master:resourcemanager
8+ juju add-relation compute-node:hadoop-nodes hdphive:hadoop-nodes
9
10 # Smoke Test
11 ## Usage
12@@ -72,7 +73,7 @@
13 hadoop file system - Below steps verifies/demos HDFS functionality
14 a) sudo su $HDFS_USER
15 b) hdfs dfs -mkdir -p /user/ubuntu
16- c) hdfs dfs -chown ubuntu:ubuntu /user/ubuntu
17+ c) hdfs dfs -chown -R ubuntu:hdfs /user
18 d) hdfs dfs -chmod -R 755 /user/ubuntu
19 e) exit
20
21
22=== added directory 'files/data'
23=== added file 'files/data/elasticsearch-hadoop-2.0.0.zip'
24Binary files files/data/elasticsearch-hadoop-2.0.0.zip 1970-01-01 00:00:00 +0000 and files/data/elasticsearch-hadoop-2.0.0.zip 2014-12-01 23:23:55 +0000 differ
25=== added symlink 'hooks/elasticsearch-relation-joined'
26=== target is u'hdp-hive-common.py'
27=== removed symlink 'hooks/elk-relation-joined'
28=== target was u'hdp-hive-common.py'
29=== added symlink 'hooks/hadoop-nodes-relation-changed'
30=== target is u'hdp-hive-common.py'
31=== modified file 'hooks/hdp-hive-common.py'
32--- hooks/hdp-hive-common.py 2014-08-22 10:54:53 +0000
33+++ hooks/hdp-hive-common.py 2014-12-01 23:23:55 +0000
34@@ -15,6 +15,8 @@
35 from charmhelpers.core.hookenv import log, Hooks, relation_get, relation_set, unit_get, open_port
36 from charmhelpers.core.host import service_start, service_stop, add_user_to_group
37 from time import sleep
38+from charmhelpers.lib.utils import get_unit_hostname
39+
40
41
42 # Required for unit tests... :(
43@@ -79,6 +81,9 @@
44 cmd = shlex.split("su hdfs -c '{} fs {}'".format(hadoopPath, command))
45 subprocess.call(cmd)
46
47+def callHDFS(command):
48+ cmd = shlex.split("su {u} -c 'hdfs {c}'".format(u="hdfs", c=command))
49+ return subprocess.check_output(cmd)
50
51 def configurMySQLClient():
52 host = relation_get('host')
53@@ -120,7 +125,20 @@
54 sleep(5)
55 start_hive_metastore()
56 start_hive_server2()
57-
58+
59+def waitForHDFSReady(namenode_ip):
60+ import time
61+ ticks = time.time()
62+ while True:
63+ if (time.time() - ticks) > 4000:
64+ log("spark ==> Reached datanode timeout value..", "ERROR")
65+ sys.exit(1)
66+ o = callHDFS("dfsadmin -D fs.defaultFS={} -report".format("hdfs://"+namenode_ip+":8020"))
67+ if o.find("Live datanodes:") == -1:
68+ time.sleep(2)
69+ log("spark ==> damenode not ready","INFO")
70+ continue
71+ break
72
73 #################################### Global Data ################################
74
75@@ -131,6 +149,7 @@
76 hdpScript = "hdp_scripts"
77 hdpScriptPath = os.path.join(os.path.sep,home, hdpScript,'scripts')
78 bashrc = os.path.join(os.path.sep, home, '.bashrc')
79+hosts_path = os.path.join(os.path.sep, 'etc', 'hosts')
80 # hadoopMemoryOptimizationData = os.path.join(os.path.sep, hdpScriptPath, "hdpMemOpt.txt");
81
82 ##########################################################################################
83@@ -147,6 +166,7 @@
84 'openjdk-7-jdk',
85 'hive',
86 'hive-hcatalog',
87+ 'unzip',
88 'mysql-connector-java*']
89 install_base_pkg(packages)
90 config_hive_nodes()
91@@ -154,6 +174,9 @@
92 init_derby_Metastore_database_schema()
93 start_hive_metastore()
94 start_hive_server2()
95+ fileSetKV(hosts_path, unit_get('private-address')+' ', get_unit_hostname())
96+ add_user_to_group("ubuntu", "hadoop")
97+ add_user_to_group(os.environ['HIVE_USER'], "hadoop")
98
99
100 @hooks.hook('resourcemanager-relation-changed')
101@@ -184,15 +207,19 @@
102 if not nameNodeReady:
103 log("HIVE ==> namenode not ready- Changed phase","INFO")
104 sys.exit(0)
105- log("HIVE ==> namenode_IP={}".format(relation_get('private-address')),"INFO")
106+ namenode_ip = relation_get('private-address')
107+ log("HIVE ==> namenode_IP={}".format(namenode_ip),"INFO")
108 setHadoopEnvVar()
109+ waitForHDFSReady(namenode_ip)
110 hdfsConfPath = os.path.join(os.path.sep, os.environ['HADOOP_CONF_DIR'],"core-site.xml")
111- setHadoopConfigXML(hdfsConfPath, "fs.defaultFS", "hdfs://"+relation_get('private-address')+":8020")
112+ setHadoopConfigXML(hdfsConfPath, "fs.defaultFS", "hdfs://"+namenode_ip+":8020")
113+ namenode_hostname = relation_get("namenode_hostname")
114+ fileSetKV(hosts_path, namenode_ip+' ', namenode_hostname)
115 hive_user = os.environ['HIVE_USER']
116 hdfs_user = os.environ['HDFS_USER']
117 #Create the Hive user home directory on HDFS.
118 callHDFS_fs("-mkdir -p /user/{}".format(hive_user))
119- callHDFS_fs('-chown {h}:{d} /user/{h}'.format(h=hive_user, d=hdfs_user))
120+ callHDFS_fs('-chown -R {h}:{d} /user'.format(h=hive_user, d=hdfs_user))
121 # Create the warehouse directory on HDFS.
122 callHDFS_fs('-mkdir -p /apps/hive/warehouse')
123 callHDFS_fs('-chown -R {h}:{d} /apps/hive'.format(h=hive_user, d=hdfs_user))
124@@ -203,6 +230,13 @@
125 callHDFS_fs("-chmod -R 777 /tmp/scratch ")
126 restart_hive_server()
127
128+@hooks.hook('hadoop-nodes-relation-changed')
129+def hadoop_nodes_relation_changed():
130+ compute_ip = relation_get('private-address')
131+ hostname = relation_get('hostname')
132+ log("==> Connect to hadoop compute node {}={}".format(compute_ip, hostname),"INFO")
133+ if hostname != None:
134+ fileSetKV(hosts_path, compute_ip+' ', hostname)
135
136 @hooks.hook('config-changed')
137 def config_changed():
138@@ -234,6 +268,23 @@
139 init_MYSQL_Metastore_database_schema()
140 start_hive_metastore()
141 start_hive_server2()
142+
143+@hooks.hook('elasticsearch-relation-joined')
144+def elasticsearch_relation_joined():
145+ elsZipfile = os.path.join(os.path.sep, os.environ['CHARM_DIR'],'files', 'data',
146+ "elasticsearch-hadoop-2.0.0.zip")
147+ hive_lib = os.path.join(os.path.sep, 'usr', 'lib', 'hive', 'lib')
148+ os.chdir(home)
149+ cmd = "unzip {}".format(elsZipfile)
150+ subprocess.call(cmd.split())
151+ srcpath = os.path.join(os.path.sep, home, "elasticsearch-hadoop-2.0.0", 'dist')
152+ source = os.listdir(srcpath)
153+ for files in source:
154+ srcFile = os.path.join(os.path.sep, srcpath, files)
155+ desFile = os.path.join(os.path.sep, hive_lib, files)
156+ copyfile(srcFile, desFile)
157+
158+
159
160 if __name__ == "__main__":
161 hooks.execute(sys.argv)
162
163=== modified file 'metadata.yaml'
164--- metadata.yaml 2014-08-18 10:05:16 +0000
165+++ metadata.yaml 2014-12-01 23:23:55 +0000
166@@ -28,5 +28,7 @@
167 interface: hive
168 db:
169 interface: mysql
170- elk:
171+ elasticsearch:
172 interface: elasticsearch
173+ hadoop-nodes:
174+ interface: mapred
175
176=== modified file 'tests/01-deploy-hive-cluster'
177--- tests/01-deploy-hive-cluster 2014-09-19 18:26:01 +0000
178+++ tests/01-deploy-hive-cluster 2014-12-01 23:23:55 +0000
179@@ -30,7 +30,7 @@
180
181 def test_remote_hdfs_status(self):
182 o,c= self.hive_unit.run("sudo su hdfs -c ' hdfs dfsadmin -report'")
183- if o.find('Datanodes available: 1') == -1:
184+ if o.find('Datanodes available: 2') == -1:
185 amulet.raise_status(amulet.FAIL, msg="Remote HDFS not available")
186
187 def test_remote_yarn_status(self):
188@@ -43,8 +43,17 @@
189 o,c= self.hive_unit.run("su hdfs -c 'hdfs dfs -mkdir -p /user/hduser'")
190 if c != 0:
191 amulet.raise_status(amulet.FAIL, msg=" Created a user directory on hdfs FAILED")
192-
193-
194+
195+ def test_hive(self):
196+ o,c= self.hive_unit.run("su hive -c 'hive -e \"create table test(col1 int, col2 string);\"'")
197+ if o.find("OK") == -1:
198+ amulet.raise_status(amulet.FAIL, msg=" Hive Create table FAILED")
199+ print(o)
200+ o,c= self.hive_unit.run("su hive -c 'hive -e \"show tables;\"'")
201+ if o.find("test") == -1:
202+ amulet.raise_status(amulet.FAIL, msg=" Hive show table FAILED")
203+ print(o)
204+
205
206 if __name__ == '__main__':
207 runner = TestDeployment()
208
209=== modified file 'tests/hive-hadoop-sql-cluster.yaml'
210--- tests/hive-hadoop-sql-cluster.yaml 2014-09-18 01:41:58 +0000
211+++ tests/hive-hadoop-sql-cluster.yaml 2014-12-01 23:23:55 +0000
212@@ -1,8 +1,8 @@
213 hdp-hadoop-hive-mysql:
214 services:
215 "compute-node":
216- charm: "cs:~asanjar/trusty/hdp-hadoop"
217- num_units: 1
218+ charm: "hdp-hadoop"
219+ num_units: 2
220 annotations:
221 "gui-x": "768"
222 "gui-y": "591.0585428804295"
223@@ -15,13 +15,13 @@
224 "gui-x": "1102.9983551210835"
225 "gui-y": "591.0585428804295"
226 hdphive:
227- charm: "cs:~asanjar/trusty/hdp-hive"
228+ charm: "hdp-hive"
229 num_units: 1
230 annotations:
231 "gui-x": "1105.4991775605417"
232 "gui-y": "284.9414571195705"
233 "yarn-hdfs-master":
234- charm: "cs:~asanjar/trusty/hdp-hadoop"
235+ charm: "hdp-hadoop"
236 num_units: 1
237 annotations:
238 "gui-x": "769.0016448789165"
239@@ -35,6 +35,8 @@
240 - "yarn-hdfs-master:resourcemanager"
241 - - "yarn-hdfs-master:resourcemanager"
242 - "compute-node:nodemanager"
243+ - - "compute-node:hadoop-nodes"
244+ - "hdphive:hadoop-nodes"
245 - - "hdphive:db"
246 - "mysql:db"
247 series: trusty

Subscribers

People subscribed via source and target branches