1
=== modified file 'config.yaml'
2
--- config.yaml	2015-05-30 04:34:18 +0000
3
+++ config.yaml	2015-06-03 05:31:01 +0000
4
@@ -4,4 +4,20 @@
5
4
        default: ''
4
        default: ''
6
5
        description: |
5
        description: |
7
6
            URL from which to fetch resources (e.g., Hadoop binaries) instead of Launchpad.
6
            URL from which to fetch resources (e.g., Hadoop binaries) instead of Launchpad.
9
7
    
7
    spark_driver_cores:
10
8
        type: int
11
9
        default: 1
12
10
        description: |
13
11
            Number of cores to use for the driver process, only in cluster
14
12
            mode.
15
13
    spark_local_dir:
16
14
        type: string
17
15
        default: /tmp
18
16
        description: |
19
17
            Directory to use for "scratch" space in Spark, including map
20
18
            output files and RDDs that get stored on disk. This should be on a
21
19
            fast, local disk in your system. It can also be a comma-separated
22
20
            list of multiple directories on different disks. NOTE: In Spark
23
21
            1.0 and later this will be overriden by SPARK_LOCAL_DIRS
24
22
            (Standalone, Mesos) or LOCAL_DIRS (YARN) environment variables set
25
23
            by the cluster manager.
26
8
24
27
=== modified file 'hooks/callbacks.py'
28
--- hooks/callbacks.py	2015-06-02 13:45:28 +0000
29
+++ hooks/callbacks.py	2015-06-03 05:31:01 +0000
30
@@ -1,4 +1,3 @@
31
1
32
2
from subprocess import check_output, Popen
1
from subprocess import check_output, Popen
33
3
2
34
4
import jujuresources
3
import jujuresources
35
@@ -6,7 +5,7 @@
36
6
from charmhelpers.core import unitdata
5
from charmhelpers.core import unitdata
37
7
from charmhelpers.contrib.bigdata import utils
6
from charmhelpers.contrib.bigdata import utils
38
8
from path import Path
7
from path import Path
40
9
8
import eawutils
41
10
9
42
11
class Spark(object):
10
class Spark(object):
43
12
11
44
@@ -18,8 +17,11 @@
45
18
        return unitdata.kv().get('spark.installed')
17
        return unitdata.kv().get('spark.installed')
46
19
18
47
20
    def install(self, force=False):
19
    def install(self, force=False):
50
21
        if not force and self.is_installed():
20
        if force or not self.is_installed():
51
22
            return
21
            install_spark()
52
22
        self.configure_spark()
53
23
54
24
    def install_spark(self):
55
23
        mirror_url = hookenv.config()['resources_mirror']
25
        mirror_url = hookenv.config()['resources_mirror']
56
24
        jujuresources.fetch('spark-%s' % self.cpu_arch, mirror_url=mirror_url)
26
        jujuresources.fetch('spark-%s' % self.cpu_arch, mirror_url=mirror_url)
57
25
        jujuresources.install('spark-%s' % self.cpu_arch,
27
        jujuresources.install('spark-%s' % self.cpu_arch,
58
@@ -29,9 +31,8 @@
59
29
        self.dist_config.add_dirs()
31
        self.dist_config.add_dirs()
60
30
        self.dist_config.add_packages()
32
        self.dist_config.add_packages()
61
31
        self.setup_spark_config()
33
        self.setup_spark_config()
62
32
        self.configure_spark()
63
33
        unitdata.kv().set('spark.installed', True)
34
        unitdata.kv().set('spark.installed', True)
65
34
        
35
66
35
    def install_demo(self):
36
    def install_demo(self):
67
36
        '''
37
        '''
68
37
        Install demo.sh script to /home/ubuntu
38
        Install demo.sh script to /home/ubuntu
69
@@ -42,11 +43,11 @@
70
42
        Path(demo_source).copy(demo_target)
43
        Path(demo_source).copy(demo_target)
71
43
        Path(demo_target).chmod(0o755)
44
        Path(demo_target).chmod(0o755)
72
44
        Path(demo_target).chown('ubuntu', 'hadoop')
45
        Path(demo_target).chown('ubuntu', 'hadoop')
74
45
        
46
75
46
    def setup_spark_config(self):
47
    def setup_spark_config(self):
76
47
        '''
48
        '''
77
48
        copy Spark's default configuration files to spark_conf property defined
49
        copy Spark's default configuration files to spark_conf property defined
79
49
        in dist.yaml 
50
        in dist.yaml
80
50
        '''
51
        '''
81
51
        conf_dir = self.dist_config.path('spark') / 'conf'
52
        conf_dir = self.dist_config.path('spark') / 'conf'
82
52
        self.dist_config.path('spark_conf').rmtree_p()
53
        self.dist_config.path('spark_conf').rmtree_p()
83
@@ -64,10 +65,10 @@
84
64
        utils.re_edit_in_place(spark_log4j, {
65
        utils.re_edit_in_place(spark_log4j, {
85
65
                r'log4j.rootCategory=INFO, console': 'log4j.rootCategory=ERROR, console',
66
                r'log4j.rootCategory=INFO, console': 'log4j.rootCategory=ERROR, console',
86
66
                })
67
                })
88
67
        
68
89
68
    def configure_spark(self):
69
    def configure_spark(self):
90
69
        '''
70
        '''
92
70
        Configure spark environment for all users 
71
        Configure spark environment for all users
93
71
        '''
72
        '''
94
72
        from subprocess import call
73
        from subprocess import call
95
73
        spark_bin = self.dist_config.path('spark') / 'bin'
74
        spark_bin = self.dist_config.path('spark') / 'bin'
96
@@ -78,12 +79,15 @@
97
78
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
79
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
98
79
        self.configure_spark_hdfs()
80
        self.configure_spark_hdfs()
99
80
        self.spark_optimize()
81
        self.spark_optimize()
100
82
        spark_default = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
101
83
        spark_config = eawutils.getSparkConfig(hookenv.config())
102
84
        eawutils.updateSparkConfig(spark_default, spark_config)
103
81
        cmd = "chown -R ubuntu:hadoop {}".format (spark_home)
85
        cmd = "chown -R ubuntu:hadoop {}".format (spark_home)
104
82
        call(cmd.split())
86
        call(cmd.split())
105
83
        cmd = "chown -R ubuntu:hadoop {}".format (self.dist_config.path('spark_conf'))
87
        cmd = "chown -R ubuntu:hadoop {}".format (self.dist_config.path('spark_conf'))
106
84
        call(cmd.split())
88
        call(cmd.split())
109
85
            
89
110
86
    def configure_spark_hdfs(self):  
90
    def configure_spark_hdfs(self):
111
87
        e = utils.read_etc_env()
91
        e = utils.read_etc_env()
112
88
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory', env=e)
92
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory', env=e)
113
89
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory', env=e)
93
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory', env=e)
114
@@ -107,19 +111,19 @@
115
107
            r'.*spark.eventLog.enabled *.*':'spark.eventLog.enabled    true',
111
            r'.*spark.eventLog.enabled *.*':'spark.eventLog.enabled    true',
116
108
            r'.*spark.eventLog.dir *.*':'spark.eventLog.dir    hdfs:///user/ubuntu/directory',
112
            r'.*spark.eventLog.dir *.*':'spark.eventLog.dir    hdfs:///user/ubuntu/directory',
117
109
                                                                                            })
113
                                                                                            })
120
110
            
114
121
111
            
115
122
112
    def start(self):
116
    def start(self):
123
113
        e = utils.read_etc_env()
117
        e = utils.read_etc_env()
124
114
        spark_home = self.dist_config.path('spark')
118
        spark_home = self.dist_config.path('spark')
125
115
        if utils.jps("HistoryServer"):
119
        if utils.jps("HistoryServer"):
126
116
            self.stop()
120
            self.stop()
127
117
        utils.run_as('ubuntu', '{}/sbin/start-history-server.sh'.format(spark_home), 'hdfs:///user/ubuntu/directory', env=e)
121
        utils.run_as('ubuntu', '{}/sbin/start-history-server.sh'.format(spark_home), 'hdfs:///user/ubuntu/directory', env=e)
129
118
        
122
130
119
    def stop(self):
123
    def stop(self):
131
120
        e = utils.read_etc_env()
124
        e = utils.read_etc_env()
132
121
        spark_home = self.dist_config.path('spark')
125
        spark_home = self.dist_config.path('spark')
134
122
        utils.run_as('ubuntu', '{}/sbin/stop-history-server.sh'.format(spark_home), env=e)        
126
        utils.run_as('ubuntu', '{}/sbin/stop-history-server.sh'.format(spark_home), env=e)
135
123
127
136
124
    def cleanup(self):
128
    def cleanup(self):
137
125
        self.dist_config.remove_dirs()
129
        self.dist_config.remove_dirs()
138
126
130
139
=== added file 'hooks/eawutils.py'
140
--- hooks/eawutils.py	1970-01-01 00:00:00 +0000
141
+++ hooks/eawutils.py	2015-06-03 05:31:01 +0000
142
@@ -0,0 +1,45 @@
143
1
# These functions should live in charmhelpers.contrib.bigdata.utils or
144
2
# somewhere similar.
145
3
import re
146
4
from charmhelpers.contrib.bigdata import utils
147
5
148
6
def updateSparkConfig(path, config):
149
7
    """Updates spark config settings in +path+.
150
8
151
9
    Assumes +path+ is in spark config file syntax."""
152
10
    inserts, updates = calcSparkConfigUpserts(path, config)
153
11
154
12
    utils.re_edit_in_place(path, updates)
155
13
    with open(path, 'a') as configFile:
156
14
        for item in inserts.items():
157
15
            configFile.write("%s\t%s\n" % item)
158
16
159
17
def calcSparkConfigUpserts(path, config):
160
18
    """Calculate upserts to transform +path+ to +config+, idempotently.
161
19
162
20
    Returns (inserts, updates)."""
163
21
    inserts = config.copy()
164
22
    updates = {}
165
23
166
24
    with open(path, 'r') as configFile:
167
25
        for line in configFile.readlines():
168
26
            if line.startswith("#") or re.match('\A\s*\Z', line):
169
27
                continue
170
28
            key = line.split(None, 1)[0]
171
29
            if key in config:
172
30
                updates["^%s\s.*" % key] = "%s\t%s" % (key, config[key])
173
31
                inserts.pop(key)
174
32
175
33
    return inserts, updates
176
34
177
35
def getKeysStartingWith(d, prefix):
178
36
    "Return a dict of the keys prefixed with +prefix+."
179
37
    return dict([(k,v) for k,v in d.items() if k.startswith(prefix)])
180
38
181
39
def underscoreToDot(d):
182
40
    "Return the dictionary with underscores in keys replaced with dots."
183
41
    return dict([(k.replace("_", "."),v) for k,v in d.items()])
184
42
185
43
def getSparkConfig(config):
186
44
    "Return a dict of the keys prefixed with 'spark.', that have non-default values."
187
45
    return underscoreToDot(getKeysStartingWith(config, "spark_"))
188
0
46
189
=== added file 'tests/100-spark-config'
190
--- tests/100-spark-config	1970-01-01 00:00:00 +0000
191
+++ tests/100-spark-config	2015-06-03 05:31:01 +0000
192
@@ -0,0 +1,43 @@
193
1
#!/usr/bin/python3
194
2
195
3
import unittest
196
4
import amulet
197
5
198
6
199
7
class TestSparkConfig(unittest.TestCase):
200
8
    """
201
9
    Configuration settings test for Apache Spark.
202
10
    """
203
11
204
12
    @classmethod
205
13
    def setUpClass(cls):
206
14
        cls.d = amulet.Deployment(series='trusty')
207
15
        #### Deploy a hadoop cluster
208
16
        cls.d.add('yarn-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-yarn-master')
209
17
        cls.d.add('hdfs-master', charm='cs:~bigdata-dev/trusty/apache-hadoop-hdfs-master')
210
18
        cls.d.add('compute-slave', charm='cs:~bigdata-dev/trusty/apache-hadoop-compute-slave', units=2)
211
19
        cls.d.add('hadoop-plugin', charm='cs:~bigdata-dev/trusty/apache-hadoop-plugin')
212
20
        cls.d.relate('yarn-master:namenode', 'hdfs-master:namenode')
213
21
        cls.d.relate('yarn-master:resourcemanager', 'hadoop-plugin:resourcemanager')
214
22
        cls.d.relate('hadoop-plugin:namenode', 'hdfs-master:namenode')
215
23
216
24
        cls.d.relate('compute-slave:nodemanager', 'yarn-master:nodemanager')
217
25
        cls.d.relate('compute-slave:datanode', 'hdfs-master:datanode')
218
26
219
27
        ### Add Spark Service
220
28
        cls.d.add('spark', 'apache-spark')
221
29
        cls.d.configure('spark', {'spark_driver_cores': 2,
222
30
                                  'spark_local_dir': '/var'})
223
31
        cls.d.relate('hadoop-plugin:hadoop-plugin', 'spark:hadoop-plugin')
224
32
225
33
        cls.d.setup(timeout=9000)
226
34
        cls.d.sentry.wait()
227
35
        cls.unit = cls.d.sentry.unit['spark/0']
228
36
229
37
    def test_config_setting(self):
230
38
        output, retcode = self.unit.run("grep -Pq 'spark.driver.cores\t2' /etc/spark/conf/spark-defaults.conf")
231
39
        self.assertEqual(retcode, 0, 'failed to configure spark service\n')
232
40
233
41
234
42
if __name__ == '__main__':
235
43
    unittest.main()
Status:	Needs review
Proposed branch:	lp:~ewollesen/charms/trusty/apache-spark/spark-config
Merge into:	lp:~bigdata-dev/charms/trusty/apache-spark/trunk
Diff against target:	235 lines (+125/-17) 4 files modified config.yaml (+17/-1) hooks/callbacks.py (+20/-16) hooks/eawutils.py (+45/-0) tests/100-spark-config (+43/-0)
To merge this branch:	bzr merge lp:~ewollesen/charms/trusty/apache-spark/spark-config
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Juju Big Data Development		2015-06-02	Pending
Review via email: mp+260782@code.launchpad.net