Merge lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election into lp:charms/cassandra
- Precise Pangolin (12.04)
- merge-reworked-leader-election
- Merge into trunk
Status: | Merged |
---|---|
Merged at revision: | 33 |
Proposed branch: | lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election |
Merge into: | lp:charms/cassandra |
Diff against target: |
433 lines (+202/-66) 2 files modified
hooks/cassandra-common (+191/-61) tests/test-cassandra-conf.sh (+11/-5) |
To merge this branch: | bzr merge lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Charles Butler (community) | Approve | ||
Review via email: mp+220125@code.launchpad.net |
Commit message
Description of the change
James Troup (elmo) wrote : | # |
JuanJo Ciarlante (jjo) wrote : | # |
> This charm desperately needs to be rewritten in anything other than bash.
+1K
Charles Butler (lazypower) wrote : | # |
While I agree with the statement the leader re-election seems to work much better than what exists now in the store.
Its not perfect, it works ~ 70% of the time. I'm going to ack this considering I've worked extensively with this particular merging codebase with greater success than what exists in the store.
If you find problems - you can find me in #juju as lazypower and I'll be happy to help you attempt to resolve any cluster-ring setup issues that may arise.
Thanks for this JuanJo! I certainly appreciate the time that went into getting this backported into the charm store charm.
If you have any questions/
JuanJo Ciarlante (jjo) wrote : | # |
> Its not perfect, it works ~ 70% of the time. I'm going to ack this considering I've worked extensively with this particular merging codebase with greater success than what exists in the store.
Can't agree more - this is a workaround for juju not providing leader election facilities, see also lp#1258485.
Thanks for the review and merge.
--J
Preview Diff
1 | === modified file 'hooks/cassandra-common' |
2 | --- hooks/cassandra-common 2013-11-22 16:43:13 +0000 |
3 | +++ hooks/cassandra-common 2014-05-19 20:14:21 +0000 |
4 | @@ -1,5 +1,5 @@ |
5 | #!/bin/bash |
6 | -# vim: set tabstop=2 expandtab: |
7 | +# vim: set ts=4 sw=4 et si: |
8 | |
9 | . ./scripts/volume-common.sh |
10 | ETC_CASSANDRA="${ETC_CASSANDRA:-/etc/cassandra}" |
11 | @@ -11,6 +11,7 @@ |
12 | CASSANDRA_TOPOLOGY="${ETC_CASSANDRA}/cassandra-topology.properties" |
13 | CASSANDRA_USER="cassandra" |
14 | CASSANDRA_GROUP="cassandra" |
15 | +NAGIOS_EXPORT_DIR="/var/lib/nagios/export" |
16 | |
17 | set -e |
18 | |
19 | @@ -19,9 +20,18 @@ |
20 | # Comms-less leader determination |
21 | # Picks the unit with the lowest id. |
22 | am_i_the_leader () { |
23 | + # FWIW this is race-prone (as not all units have the same 'relation-list' |
24 | + # view at the same point in time), so am_i_the_leader it's kinda ~best-effort. |
25 | + # Until juju-core provides facilities for master election, keep this |
26 | + # nasty workaround. |
27 | # Leader is the lowest unit number and won't be in the |
28 | # list of relation peers |
29 | + if [ -z "${JUJU_RELATION_ID}" ]; then |
30 | + juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]}: not running under relation context" |
31 | + return 2 |
32 | + fi |
33 | local units=$(relation-list) |
34 | + juju-log "related units seen: ${units}" |
35 | local unit_no=${JUJU_UNIT_NAME##*/} |
36 | local unit |
37 | for unit in ${units}; do |
38 | @@ -52,6 +62,15 @@ |
39 | mv "$from" "$from.migrated_${JUJU_UNIT_NAME//\//-}@$(date +%s)" |
40 | } |
41 | |
42 | +# When the volume is mounted by another (fresh) unit, getent passwd |
43 | +# entry *can* get a different UID (depending e.g. on user order |
44 | +# creation and/or other role accounts present), thus the need |
45 | +# to fix permissions (ownership) over cassandra data dir. |
46 | +fix_cassandra_perms() { |
47 | + local data_dir=${1?} |
48 | + chown -R cassandra:cassandra ${data_dir} |
49 | +} |
50 | + |
51 | # Install cassandra source and install packages |
52 | install_cassandra () { |
53 | juju-log "Installing Cassandra" |
54 | @@ -91,35 +110,78 @@ |
55 | |
56 | get_private_ip () { |
57 | # Make sure that we use the resolvable private address |
58 | - # dig deals with both hostnames and ip addresses in a nice |
59 | - # way - this has been tested in local and ec2 providers |
60 | - # with ec2 and openstack |
61 | - dig +short `unit-get private-address` |
62 | -} |
63 | - |
64 | -get_seed_nodes () { |
65 | - FORCE_SEED_NODES=$(config-get force-seed-nodes) |
66 | - if [[ -n $FORCE_SEED_NODES ]]; then |
67 | - echo $FORCE_SEED_NODES |
68 | - else |
69 | - relation-get seed-nodes | sed -e 's!\s\+!,!g' |
70 | - fi |
71 | -} |
72 | - |
73 | -set_seed_nodes () { |
74 | - FORCE_SEED_NODES=$(config-get force-seed-nodes) |
75 | - if [[ -n $FORCE_SEED_NODES ]]; then |
76 | - juju-log "Setting seed_node to $FORCE_SEED_NODES" |
77 | - relation-set seed-nodes="$FORCE_SEED_NODES" |
78 | - else |
79 | - juju-log "Setting seed_node to $(running_nodes)" |
80 | - relation-set seed-nodes="$(running_nodes)" |
81 | - fi |
82 | + # some dig versions dealxs with both hostnames and ip |
83 | + # addresses, but don't depend on that. |
84 | + local address=$(unit-get private-address) |
85 | + if [[ ${address} =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]]; then |
86 | + # already an IP |
87 | + echo "${address}" |
88 | + else |
89 | + # not an IP (some providers return hostnames), resolve it |
90 | + dig +short "${address}" |
91 | + fi |
92 | +} |
93 | + |
94 | +relation-get-all() { |
95 | + # reltype is e.g. 'cluster' |
96 | + # relkey is e.g. 'seed-nodes' |
97 | + # default separator is space |
98 | + local reltype=${1:?} relkey=${2:?} separator="${3:- }" |
99 | + local rid unit value= |
100 | + # aggregate all values from every related unit |
101 | + value=$( |
102 | + for rid in $(relation-ids ${reltype});do |
103 | + for unit in $(relation-list -r ${rid});do |
104 | + relation-get -r ${rid} ${relkey} ${unit} |
105 | + done |
106 | + done |
107 | + ) |
108 | + echo "${value}" | sort -u | paste -d "${separator}" -s |
109 | +} |
110 | + |
111 | +write_seed_nodes() { |
112 | + local seeds="${1?}" |
113 | + # write to config if non-null seeds, or seed value coming from |
114 | + if [ -n "${seeds}" ] || [ "$(config-get force-seed-nodes)" != "" ] || \ |
115 | + [ "$(config-get allow-single-node)" = "True" ]; then |
116 | + local seeds_comma=$(echo "$seeds" | sed -r 's/ +/,/g') |
117 | + juju-log "Setting seeds node to be '${seeds_comma}'" |
118 | + sed -i -e "s/\- seeds:.*/\- seeds: \"${seeds_comma}\"/" ${CASSANDRA_YML} |
119 | + else |
120 | + # This happens when e.g. the unit relates to a non-leader |
121 | + juju-log "Skipping seeds node setting (no seeds)" |
122 | + fi |
123 | +} |
124 | + |
125 | +get_seed_nodes() { |
126 | + local force_seed_nodes="$(config-get force-seed-nodes)" |
127 | + local allow_single_node="$(config-get allow-single-node)" |
128 | + local seeds |
129 | + |
130 | + if [ -n "${force_seed_nodes}" ]; then |
131 | + seeds="${force_seed_nodes}" |
132 | + elif [ "${allow_single_node}" == "True" ]; then |
133 | + seeds="$(get_private_ip)" |
134 | + # If running under relation context ... |
135 | + elif [ -n "${JUJU_RELATION_ID}" ]; then |
136 | + if am_i_the_leader; then |
137 | + seeds="$(running_nodes)" |
138 | + # Too early, maybe bootstraping the whole cluster ... |
139 | + if [ -z "${seeds}" ]; then |
140 | + seeds=$(get_private_ip) |
141 | + fi |
142 | + else |
143 | + seeds=$(relation-get-all cluster seed-nodes) |
144 | + fi |
145 | + fi |
146 | + juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]}: seeds='$seeds'" |
147 | + echo "${seeds}" |
148 | } |
149 | |
150 | srv_root_get() { |
151 | sed -r -n 's,commitlog_directory: *([^ ]+)/commitlog.*,\1,p' ${CASSANDRA_YML} |
152 | } |
153 | + |
154 | srv_root_from_volid() { |
155 | local volid=${1?missing volid} |
156 | local mntpoint |
157 | @@ -193,7 +255,6 @@ |
158 | local COMPACTION_THROUGHPUT=$(config-get compaction-throughput) |
159 | local STREAM_THROUGHPUT=$(config-get stream-throughput) |
160 | local NUM_TOKENS=$(config-get num-tokens) |
161 | - local SEED_ADDRESSES=$(get_seed_nodes) |
162 | local INITIAL_TOKEN=$(get_initial_token) |
163 | local PARTITIONER=$(config-get partitioner) |
164 | local ENDPOINT_SNITCH=$(config-get endpoint_snitch) |
165 | @@ -206,7 +267,6 @@ |
166 | # specify this as a configuration parameter |
167 | # non-trivial sed to edit data_file_directories (has its value in the line following it) |
168 | sed -i -r -e "s/^cluster_name:.*/cluster_name: \'${CLUSTER_NAME}\'/" \ |
169 | - -e "s/(- seeds):.*/\1: \"${SEED_ADDRESSES}\"/" \ |
170 | -e "s/^(storage_port):.*/\1: ${CLUSTER_PORT}/" \ |
171 | -e "s/^(listen_address):.*/\1: ${LISTEN_ADDRESS}/" \ |
172 | -e "s/^(rpc_address):.*/\1: ${LISTEN_ADDRESS}/" \ |
173 | @@ -273,6 +333,7 @@ |
174 | juju-log "WARNING: found already existing cassandra data, migrating ${srv_root_curr} -> ${srv_root_new}" |
175 | migrate_cassandra_data ${srv_root_curr} ${srv_root_new} |
176 | fi |
177 | + fix_cassandra_perms ${srv_root_new} |
178 | srv_root_save ${srv_root_new} |
179 | fi |
180 | # Set the kernel block scheduler |
181 | @@ -320,6 +381,7 @@ |
182 | else |
183 | del_JVM_OPTS_line "JUJU_extra-jvm-opts" |
184 | fi |
185 | + reconfigure_cluster_seeds |
186 | } |
187 | |
188 | # Remove JVM_OPTS line by regex |
189 | @@ -346,19 +408,60 @@ |
190 | } |
191 | |
192 | # Service Control Commands |
193 | +cassandra_is_running() { |
194 | + juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@" |
195 | + service cassandra status |
196 | +} |
197 | + |
198 | +# wait until "$func $*" OK, loop $times sleeping $secs (default: 1) |
199 | +wait_until() { |
200 | + local secs=${1:?} times=${2:?} func=${3:?} |
201 | + shift 3 |
202 | + juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@" |
203 | + while let times=times-1; do |
204 | + if ${func} ${*}; then |
205 | + juju-log "${FUNCNAME[0]} $func $*: OK (times=$times left)" |
206 | + return 0 |
207 | + fi |
208 | + juju-log "${FUNCNAME[0]} $func $*: sleep ${secs}, times=$times left" |
209 | + sleep ${secs} |
210 | + done |
211 | + return 1 |
212 | +} |
213 | + |
214 | +# ditto wait_until, with opposite logic |
215 | +wait_until_not() { |
216 | + local secs=${1:?} times=${2:?} func=${3:?} |
217 | + juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@" |
218 | + while let times=times-1; do |
219 | + if ${func} ${*}; then |
220 | + : |
221 | + else |
222 | + juju-log "${FUNCNAME[0]} $func $*: OK (times=$times left)" |
223 | + return 0 |
224 | + fi |
225 | + juju-log "${FUNCNAME[0]} $func $*: sleep ${secs}, times=$times left" |
226 | + sleep ${secs} |
227 | + done |
228 | + return 1 |
229 | +} |
230 | + |
231 | restart_cassandra () { |
232 | juju-log "Restarting Cassandra" |
233 | - service cassandra status && service cassandra restart || : |
234 | + service cassandra restart |
235 | + wait_until 1 60 cassandra_is_running |
236 | } |
237 | |
238 | start_cassandra() { |
239 | juju-log "Starting Cassandra" |
240 | service cassandra status || service cassandra start |
241 | + wait_until 1 60 cassandra_is_running |
242 | } |
243 | |
244 | stop_cassandra() { |
245 | juju-log "Stopping Cassandra" |
246 | service cassandra stop || : |
247 | + wait_until_not 1 60 cassandra_is_running |
248 | } |
249 | |
250 | setup_database_interface () { |
251 | @@ -371,19 +474,23 @@ |
252 | relation-set port="$(config-get jmx-port)" |
253 | } |
254 | |
255 | -reconfigure_cluster_seeds () { |
256 | - juju-log "Reconfiguring cluster seeds" |
257 | - seeds=$(get_seed_nodes) |
258 | - IP=`get_private_ip` |
259 | - if [ -z "${seeds}" ] && am_i_the_leader; then |
260 | - juju-log "Configuring myself locally as a seed node" |
261 | - sed -i -e "s/\- seeds:.*/\- seeds: \"${IP}\"/" ${CASSANDRA_YML} |
262 | - else |
263 | - juju-log "Setting seeds node to be ${seeds}" |
264 | - sed -i -e "s/\- seeds:.*/\- seeds: \"${seeds}\"/" ${CASSANDRA_YML} |
265 | - fi |
266 | - juju-log "$JUJU_REMOTE_UNIT modified its settings or departed" |
267 | - bzr_ci || : |
268 | +reconfigure_cluster_seeds() { |
269 | + juju-log "Reconfiguring cluster seeds" |
270 | + local seeds=$(get_seed_nodes) |
271 | + write_seed_nodes "${seeds}" |
272 | + # If under relation context, and I'm leader => propagate seed-nodes to other units. |
273 | + if [ -n "${seeds}" -a -n "${JUJU_RELATION_ID}" ] && am_i_the_leader; then |
274 | + juju-log "Setting relation seed-nodes to ${seeds}" |
275 | + relation-set seed-nodes="${seeds}" |
276 | + fi |
277 | +} |
278 | + |
279 | +nodestatus_is() { |
280 | + local ip="${1:?}" wanted_status="${2:?}" |
281 | + local nodestatus="$(nodestatus ${ip})" |
282 | + juju-log "nodestatus='${nodestatus}' wanted_status='${wanted_status}'" |
283 | + [[ ${nodestatus} =~ ${wanted_status} ]] |
284 | + return $? |
285 | } |
286 | |
287 | nodestatus() { |
288 | @@ -410,31 +517,43 @@ |
289 | paste -d " " -s |
290 | } |
291 | |
292 | +echo_yml_expression() { |
293 | + python -c "import yaml,sys;print yaml.load(sys.stdin)$1" < ${CASSANDRA_YML} |
294 | +} |
295 | + |
296 | +current_file_seeds() { |
297 | + echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]' |
298 | +} |
299 | + |
300 | bootstrap () { |
301 | - service cassandra status && return 0 |
302 | - seeds=$(get_seed_nodes) |
303 | - if [ -n "${seeds}" ] || am_i_the_leader; then |
304 | - start_cassandra |
305 | + local IP |
306 | + local count status |
307 | + if am_i_the_leader; then |
308 | + : |
309 | + else |
310 | + local file_seeds=$(current_file_seeds) |
311 | + case "${file_seeds}" in |
312 | + ""|127.*) |
313 | + juju-log "Returning soft-fail on not-yet received external seeds" |
314 | + return 0;; |
315 | + esac |
316 | + fi |
317 | juju-log "Waiting for this node to join the cluster" |
318 | - IP=`get_private_ip` |
319 | - count=0 |
320 | - status="$(nodestatus $IP)" |
321 | - while [ "${status}" != "UpNormal" ] && [ $count -lt 40 ]; do |
322 | - sleep 30 |
323 | - status="$(nodestatus $IP)" |
324 | - count=$((count + 1)) |
325 | - juju-log "Waited $((count * 30)) seconds so far...(nodestatus=${status})" |
326 | + # wait until cassandra is UP ok, 3 retries |
327 | + # start_cassandra will no-op if already running |
328 | + count=3 |
329 | + while let count=count-1; do |
330 | + start_cassandra |
331 | + if wait_until 60 2 nodestatus_is $(get_private_ip) "Up(Joining|Normal)"; then |
332 | + return 0 |
333 | + fi |
334 | done |
335 | - if [ $count -eq 40 ]; then |
336 | - juju-log "Timed out waiting for node" |
337 | - exit 2 |
338 | - fi |
339 | - set_seed_nodes |
340 | - fi |
341 | + juju-log "Timed out waiting for Up Normal/Joining" |
342 | + exit 2 |
343 | } |
344 | |
345 | remove_down_nodes () { |
346 | - set_seed_nodes |
347 | + reconfigure_cluster_seeds |
348 | } |
349 | |
350 | decommission_node () { |
351 | @@ -469,8 +588,11 @@ |
352 | export SERVICE_DESCRIPTION="Cassandra server heap usage" |
353 | export NRPE_CMD_NAME="check_${NAGIOS_CONTEXT}_cassandra_heap" |
354 | export NRPE_CMD="/usr/lib/nagios/plugins/check_cassandra_heap.sh ${IP} ${WARN_PCT} ${CRIT_PCT}" |
355 | + if [ ! -d "${NAGIOS_EXPORT_DIR}" ]; then |
356 | + mkdir -p "${NAGIOS_EXPORT_DIR}" |
357 | + fi |
358 | cheetah fill --env -p templates/nrpe_cmd_file.tmpl > /etc/nagios/nrpe.d/${NRPE_CMD_NAME}.cfg |
359 | - cheetah fill --env -p templates/nrpe_service_file.tmpl > /var/lib/nagios/export/service__${NAGIOS_HOSTNAME}_${NRPE_CMD_NAME}.cfg |
360 | + cheetah fill --env -p templates/nrpe_service_file.tmpl > ${NAGIOS_EXPORT_DIR}/service__${NAGIOS_HOSTNAME}_${NRPE_CMD_NAME}.cfg |
361 | fi |
362 | } |
363 | |
364 | @@ -524,6 +646,9 @@ |
365 | # Construct the cassandra.yaml file from the appropriate information above |
366 | configure_cassandra |
367 | bzr_ci && needs_restart=: |
368 | + # be sure to kick a cassandra restart if not |
369 | + # running yet |
370 | + cassandra_is_running || needs_restart=: |
371 | if [ $needs_restart ]; then |
372 | # Restart as required |
373 | restart_cassandra |
374 | @@ -548,13 +673,18 @@ |
375 | ;; |
376 | cluster-relation-joined) |
377 | juju-log "Joining cassandra cluster..." |
378 | + am_i_the_leader || sleep $(( 30 + RANDOM%15 )) |
379 | ;; |
380 | cluster-relation-changed) |
381 | reconfigure_cluster_seeds |
382 | + # if there were config changes, restart cassandra |
383 | + bzr_ci && restart_cassandra |
384 | bootstrap |
385 | ;; |
386 | cluster-relation-departed) |
387 | reconfigure_cluster_seeds |
388 | + # if there were config changes, restart cassandra |
389 | + bzr_ci && restart_cassandra |
390 | remove_down_nodes |
391 | ;; |
392 | jmx-relation-joined) |
393 | |
394 | === modified file 'tests/test-cassandra-conf.sh' |
395 | --- tests/test-cassandra-conf.sh 2013-11-21 01:00:58 +0000 |
396 | +++ tests/test-cassandra-conf.sh 2014-05-19 20:14:21 +0000 |
397 | @@ -122,10 +122,10 @@ |
398 | test_set_config force-seed-nodes "${value}" |
399 | hook_main config-changed 2>&1 || exit 1 |
400 | exp_ok test $(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = "${value}" |
401 | - # and un-setting |
402 | + # and un-setting while bring back MY_IP |
403 | test_set_config force-seed-nodes "" |
404 | hook_main config-changed 2>&1 || exit 1 |
405 | - exp_ok test x$(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = x |
406 | + exp_ok test $(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = "$MY_IP" |
407 | } |
408 | test_units_to_update() { |
409 | # Change something |
410 | @@ -198,14 +198,20 @@ |
411 | mkdir -p ${ETC_CASSANDRA} |
412 | # Create fake environment (juju and other cmds) |
413 | declare -A CONFIG |
414 | -JUJU_UNIT_NAME="cassandra-test/0" |
415 | +MY_IP="127.0.0.99" |
416 | +export JUJU_UNIT_NAME="cassandra-test/0" |
417 | +export JUJU_RELATION_ID=foo:1 |
418 | open-port() { echo "DRY: open-port $@"; } |
419 | +service() { echo "DRY: service $@"; } |
420 | config-get() { eval echo "\${CONFIG[$1]}"; } |
421 | +running_nodes(){ echo "$MY_IP"; } |
422 | unit-get() { echo "$JUJU_UNIT_NAME"; } |
423 | -juju-log() { echo "$@" ;} |
424 | +juju-log() { echo "juju-log: $@" >&2 ;} |
425 | relation-get() { echo "" ;} |
426 | +relation-list(){ echo "" ;} |
427 | +relation-set() { : ;} |
428 | bzr() { echo "DRY: bzr $@"; } |
429 | -dig() { [[ ${FUNCNAME[1]} == get_private_ip ]] && echo "127.0.0.99" ;} |
430 | +dig() { [[ ${FUNCNAME[1]} == get_private_ip ]] && echo "$MY_IP" ;} |
431 | source_charm_code |
432 | CASSANDRA_USER=$(id -nu) |
433 | CASSANDRA_GROUP=$(id -ng) |
This charm desperately needs to be rewritten in anything other than bash.