1
=== modified file 'README.md'
2
--- README.md	2015-05-19 16:00:07 +0000
3
+++ README.md	2015-05-28 21:05:22 +0000
4
@@ -22,6 +22,50 @@
5
22
    juju ssh client/0
22
    juju ssh client/0
6
23
    hadoop jar my-job.jar
23
    hadoop jar my-job.jar
7
24
24
8
25
## Benchmarking
9
26
10
27
    You can perform a terasort benchmark, in order to gauge performance of your environment:
11
28
12
29
        $ juju action do apache-hadoop-client/0 terasort
13
30
        Action queued with id: cbd981e8-3400-4c8f-8df1-c39c55a7eae6
14
31
        $ juju action fetch --wait 0 cbd981e8-3400-4c8f-8df1-c39c55a7eae6
15
32
        results:
16
33
          meta:
17
34
            composite:
18
35
              direction: asc
19
36
              units: ms
20
37
              value: "206676"
21
38
          results:
22
39
            raw: '{"Total vcore-seconds taken by all map tasks": "439783", "Spilled Records":
23
40
              "30000000", "WRONG_LENGTH": "0", "Reduce output records": "10000000", "HDFS:
24
41
              Number of bytes read": "1000001024", "Total vcore-seconds taken by all reduce
25
42
              tasks": "50275", "Reduce input groups": "10000000", "Shuffled Maps ": "8", "FILE:
26
43
              Number of bytes written": "3128977482", "Input split bytes": "1024", "Total
27
44
              time spent by all reduce tasks (ms)": "50275", "FILE: Number of large read operations":
28
45
              "0", "Bytes Read": "1000000000", "Virtual memory (bytes) snapshot": "7688794112",
29
46
              "Launched map tasks": "8", "GC time elapsed (ms)": "11656", "Bytes Written":
30
47
              "1000000000", "FILE: Number of read operations": "0", "HDFS: Number of write
31
48
              operations": "2", "Total megabyte-seconds taken by all reduce tasks": "51481600",
32
49
              "Combine output records": "0", "HDFS: Number of bytes written": "1000000000",
33
50
              "Total time spent by all map tasks (ms)": "439783", "Map output records": "10000000",
34
51
              "Physical memory (bytes) snapshot": "2329722880", "FILE: Number of write operations":
35
52
              "0", "Launched reduce tasks": "1", "Reduce input records": "10000000", "Total
36
53
              megabyte-seconds taken by all map tasks": "450337792", "WRONG_REDUCE": "0",
37
54
              "HDFS: Number of read operations": "27", "Reduce shuffle bytes": "1040000048",
38
55
              "Map input records": "10000000", "Map output materialized bytes": "1040000048",
39
56
              "CPU time spent (ms)": "195020", "Merged Map outputs": "8", "FILE: Number of
40
57
              bytes read": "2080000144", "Failed Shuffles": "0", "Total time spent by all
41
58
              maps in occupied slots (ms)": "439783", "WRONG_MAP": "0", "BAD_ID": "0", "Rack-local
42
59
              map tasks": "2", "IO_ERROR": "0", "Combine input records": "0", "Map output
43
60
              bytes": "1020000000", "CONNECTION": "0", "HDFS: Number of large read operations":
44
61
              "0", "Total committed heap usage (bytes)": "1755840512", "Data-local map tasks":
45
62
              "6", "Total time spent by all reduces in occupied slots (ms)": "50275"}'
46
63
        status: completed
47
64
        timing:
48
65
          completed: 2015-05-28 20:55:50 +0000 UTC
49
66
          enqueued: 2015-05-28 20:53:41 +0000 UTC
50
67
          started: 2015-05-28 20:53:44 +0000 UTC
51
68
52
25
69
53
26
## Contact Information
70
## Contact Information
54
27
71
55
28
72
56
=== added directory 'actions'
57
=== added file 'actions.yaml'
58
--- actions.yaml	1970-01-01 00:00:00 +0000
59
+++ actions.yaml	2015-05-28 21:05:22 +0000
60
@@ -0,0 +1,38 @@
61
1
teragen:
62
2
    description: foo
63
3
    params:
64
4
        size:
65
5
            description: The number of 100 byte rows, default to 100MB of data to generate and sort
66
6
            type: string
67
7
            default: "10000000"
68
8
        indir:
69
9
            description: foo
70
10
            type: string
71
11
            default: 'tera_demo_in'
72
12
terasort:
73
13
    description: foo
74
14
    params:
75
15
        indir:
76
16
            description: foo
77
17
            type: string
78
18
            default: 'tera_demo_in'
79
19
        outdir:
80
20
            description: foo
81
21
            type: string
82
22
            default: 'tera_demo_out'
83
23
        size:
84
24
            description: The number of 100 byte rows, default to 100MB of data to generate and sort
85
25
            type: string
86
26
            default: "10000000"
87
27
        maps:
88
28
            description: The default number of map tasks per job. 1-20
89
29
            type: integer
90
30
            default: 1
91
31
        reduces:
92
32
            description: The default number of reduce tasks per job. Typically set to 99% of the cluster's reduce capacity, so that if a node fails the reduces can still be executed in a single wave. Try 1-20
93
33
            type: integer
94
34
            default: 1
95
35
        numtasks:
96
36
            description: How many tasks to run per jvm. If set to -1, there is no limit.
97
37
            type: integer
98
38
            default: 1
99
0
39
100
=== added file 'actions/parseTerasort.py'
101
--- actions/parseTerasort.py	1970-01-01 00:00:00 +0000
102
+++ actions/parseTerasort.py	2015-05-28 21:05:22 +0000
103
@@ -0,0 +1,54 @@
104
1
#!/usr/bin/env python
105
2
"""
106
3
Simple script to parse cassandra-stress' transaction results
107
4
and reformat them as JSON for sending back to juju
108
5
"""
109
6
import sys
110
7
import subprocess
111
8
import json
112
9
from charmhelpers.contrib.benchmark import Benchmark
113
10
import re
114
11
115
12
116
13
def action_set(key, val):
117
14
    action_cmd = ['action-set']
118
15
    if isinstance(val, dict):
119
16
        for k, v in val.iteritems():
120
17
            action_set('%s.%s' % (key, k), v)
121
18
        return
122
19
123
20
    action_cmd.append('%s=%s' % (key, val))
124
21
    subprocess.check_call(action_cmd)
125
22
126
23
127
24
def parse_terasort_output():
128
25
    """
129
26
    Parse the output from terasort and set the action results:
130
27
131
28
    """
132
29
133
30
    results = {}
134
31
135
32
    # Find all of the interesting things
136
33
    regex = re.compile('\t+(.*)=(.*)')
137
34
    for line in sys.stdin.readlines():
138
35
        m = regex.match(line)
139
36
        if m:
140
37
            results[m.group(1)] = m.group(2)
141
38
    action_set("results.raw", json.dumps(results))
142
39
143
40
    # Calculate what's important
144
41
    if 'CPU time spent (ms)' in results:
145
42
        composite = int(results['CPU time spent (ms)']) + int(results['GC time elapsed (ms)'])
146
43
        Benchmark.set_composite_score(
147
44
            composite,
148
45
            'ms',
149
46
            'asc'
150
47
        )
151
48
    else:
152
49
        print "Invalid test results"
153
50
        print results
154
51
155
52
156
53
if __name__ == "__main__":
157
54
    parse_terasort_output()
158
0
55
159
=== added file 'actions/teragen'
160
--- actions/teragen	1970-01-01 00:00:00 +0000
161
+++ actions/teragen	2015-05-28 21:05:22 +0000
162
@@ -0,0 +1,21 @@
163
1
#!/bin/bash
164
2
set -eux
165
3
SIZE=`action-get size`
166
4
IN_DIR=`action-get indir`
167
5
168
6
benchmark-start
169
7
170
8
# I don't know why, but have to source /etc/environment before and after
171
9
# invoking the bash shell to get it working.
172
10
. /etc/environment
173
11
su ubuntu << EOF
174
12
. /etc/environment
175
13
if JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${IN_DIR}; then
176
14
    JAVA_HOME=${JAVA_HOME} hadoop fs -rm -r -skipTrash ${IN_DIR} || true
177
15
fi
178
16
179
17
JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar teragen ${SIZE} ${IN_DIR}
180
18
181
19
EOF
182
20
183
21
benchmark-finish
184
0
22
185
=== added file 'actions/terasort'
186
--- actions/terasort	1970-01-01 00:00:00 +0000
187
+++ actions/terasort	2015-05-28 21:05:22 +0000
188
@@ -0,0 +1,49 @@
189
1
#!/bin/bash
190
2
IN_DIR=`action-get indir`
191
3
OUT_DIR=`action-get outdir`
192
4
SIZE=`action-get size`
193
5
OPTIONS=''
194
6
195
7
MAPS=`action-get maps`
196
8
REDUCES=`action-get reduces`
197
9
NUMTASKS=`action-get numtasks`
198
10
199
11
OPTIONS="${OPTIONS} -D mapreduce.job.maps=${MAPS}"
200
12
OPTIONS="${OPTIONS} -D mapreduce.job.reduces=${REDUCES}"
201
13
OPTIONS="${OPTIONS} -D mapreduce.job.jvm.numtasks=${NUMTASKS}"
202
14
203
15
mkdir -p /opt/terasort
204
16
chown ubuntu:ubuntu /opt/terasort
205
17
run=`date +%s`
206
18
207
19
# HACK: the environment reset below is munging the PATH
208
20
OLDPATH=$PATH
209
21
210
22
211
23
# I don't know why, but have to source /etc/environment before and after
212
24
# invoking the bash shell to get it working.
213
25
. /etc/environment
214
26
su ubuntu << EOF
215
27
. /etc/environment
216
28
217
29
mkdir -p /opt/terasort/results/$run
218
30
219
31
# If there's no data generated yet, create it using the action defaults
220
32
if ! JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${IN_DIR} &> /dev/null; then
221
33
    JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar teragen ${SIZE} ${IN_DIR} > /dev/null
222
34
223
35
fi
224
36
225
37
# If there's already sorted data, remove it
226
38
if JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${OUT_DIR} &> /dev/null; then
227
39
    JAVA_HOME=${JAVA_HOME} hadoop fs -rm -r -skipTrash ${OUT_DIR} || true
228
40
fi
229
41
230
42
benchmark-start
231
43
JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar terasort ${OPTIONS} ${IN_DIR} ${OUT_DIR} &> /opt/terasort/results/$run/terasort.log
232
44
benchmark-finish
233
45
234
46
EOF
235
47
PATH=$OLDPATH
236
48
237
49
`cat /opt/terasort/results/$run/terasort.log | python $CHARM_DIR/actions/parseTerasort.py`
238
0
50
239
=== added file 'hooks/benchmark-relation-changed'
240
--- hooks/benchmark-relation-changed	1970-01-01 00:00:00 +0000
241
+++ hooks/benchmark-relation-changed	2015-05-28 21:05:22 +0000
242
@@ -0,0 +1,3 @@
243
1
#!/bin/bash
244
2
245
3
relation-set benchmarks=terasort
246
0
4
247
=== modified file 'hooks/install'
248
--- hooks/install	2015-05-11 22:25:12 +0000
249
+++ hooks/install	2015-05-28 21:05:22 +0000
250
@@ -1,2 +1,4 @@
251
1
#!/bin/bash
1
#!/bin/bash
252
2
apt-get install -y python-pip && pip install -U charm-benchmark
253
3
254
2
hooks/status-set blocked "Please add relation to apache-hadoop-plugin"
4
hooks/status-set blocked "Please add relation to apache-hadoop-plugin"
255
3
5
256
=== added symlink 'hooks/upgrade-charm'
257
=== target is u'install'
258
=== modified file 'metadata.yaml'
259
--- metadata.yaml	2015-05-12 22:18:09 +0000
260
+++ metadata.yaml	2015-05-28 21:05:22 +0000
261
@@ -12,3 +12,5 @@
262
12
  hadoop-plugin:
12
  hadoop-plugin:
263
13
    interface: hadoop-plugin
13
    interface: hadoop-plugin
264
14
    scope: container
14
    scope: container
265
15
  benchmark:
266
16
    interface: benchmark
Status:	Merged
Merge reported by:	Cory Johns
Merged at revision:	not available
Proposed branch:	lp:~aisrael/charms/trusty/apache-hadoop-client/benchmarks
Merge into:	lp:~bigdata-dev/charms/trusty/apache-hadoop-client/trunk
Diff against target:	266 lines (+213/-0) 8 files modified README.md (+44/-0) actions.yaml (+38/-0) actions/parseTerasort.py (+54/-0) actions/teragen (+21/-0) actions/terasort (+49/-0) hooks/benchmark-relation-changed (+3/-0) hooks/install (+2/-0) metadata.yaml (+2/-0)
To merge this branch:	bzr merge lp:~aisrael/charms/trusty/apache-hadoop-client/benchmarks
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Juju Big Data Development		2015-05-28	Pending
Review via email: mp+260526@code.launchpad.net