Merge lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election into lp:charms/cassandra

Proposed by JuanJo Ciarlante
Status: Merged
Merged at revision: 33
Proposed branch: lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election
Merge into: lp:charms/cassandra
Diff against target: 433 lines (+202/-66)
2 files modified
hooks/cassandra-common (+191/-61)
tests/test-cassandra-conf.sh (+11/-5)
To merge this branch: bzr merge lp:~jjo/charms/precise/cassandra/merge-reworked-leader-election
Reviewer Review Type Date Requested Status
Charles Butler (community) Approve
Review via email: mp+220125@code.launchpad.net
To post a comment you must log in.
Revision history for this message
James Troup (elmo) wrote :

This charm desperately needs to be rewritten in anything other than bash.

Revision history for this message
JuanJo Ciarlante (jjo) wrote :

> This charm desperately needs to be rewritten in anything other than bash.

+1K

Revision history for this message
Charles Butler (lazypower) wrote :

While I agree with the statement the leader re-election seems to work much better than what exists now in the store.

Its not perfect, it works ~ 70% of the time. I'm going to ack this considering I've worked extensively with this particular merging codebase with greater success than what exists in the store.

If you find problems - you can find me in #juju as lazypower and I'll be happy to help you attempt to resolve any cluster-ring setup issues that may arise.

Thanks for this JuanJo! I certainly appreciate the time that went into getting this backported into the charm store charm.

If you have any questions/comments/concerns about the review contact us in #juju on irc.freenode.net or email the mailing list <email address hidden>

review: Approve
Revision history for this message
JuanJo Ciarlante (jjo) wrote :

> Its not perfect, it works ~ 70% of the time. I'm going to ack this considering I've worked extensively with this particular merging codebase with greater success than what exists in the store.

Can't agree more - this is a workaround for juju not providing leader election facilities, see also lp#1258485.

Thanks for the review and merge.
--J

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'hooks/cassandra-common'
2--- hooks/cassandra-common 2013-11-22 16:43:13 +0000
3+++ hooks/cassandra-common 2014-05-19 20:14:21 +0000
4@@ -1,5 +1,5 @@
5 #!/bin/bash
6-# vim: set tabstop=2 expandtab:
7+# vim: set ts=4 sw=4 et si:
8
9 . ./scripts/volume-common.sh
10 ETC_CASSANDRA="${ETC_CASSANDRA:-/etc/cassandra}"
11@@ -11,6 +11,7 @@
12 CASSANDRA_TOPOLOGY="${ETC_CASSANDRA}/cassandra-topology.properties"
13 CASSANDRA_USER="cassandra"
14 CASSANDRA_GROUP="cassandra"
15+NAGIOS_EXPORT_DIR="/var/lib/nagios/export"
16
17 set -e
18
19@@ -19,9 +20,18 @@
20 # Comms-less leader determination
21 # Picks the unit with the lowest id.
22 am_i_the_leader () {
23+ # FWIW this is race-prone (as not all units have the same 'relation-list'
24+ # view at the same point in time), so am_i_the_leader it's kinda ~best-effort.
25+ # Until juju-core provides facilities for master election, keep this
26+ # nasty workaround.
27 # Leader is the lowest unit number and won't be in the
28 # list of relation peers
29+ if [ -z "${JUJU_RELATION_ID}" ]; then
30+ juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]}: not running under relation context"
31+ return 2
32+ fi
33 local units=$(relation-list)
34+ juju-log "related units seen: ${units}"
35 local unit_no=${JUJU_UNIT_NAME##*/}
36 local unit
37 for unit in ${units}; do
38@@ -52,6 +62,15 @@
39 mv "$from" "$from.migrated_${JUJU_UNIT_NAME//\//-}@$(date +%s)"
40 }
41
42+# When the volume is mounted by another (fresh) unit, getent passwd
43+# entry *can* get a different UID (depending e.g. on user order
44+# creation and/or other role accounts present), thus the need
45+# to fix permissions (ownership) over cassandra data dir.
46+fix_cassandra_perms() {
47+ local data_dir=${1?}
48+ chown -R cassandra:cassandra ${data_dir}
49+}
50+
51 # Install cassandra source and install packages
52 install_cassandra () {
53 juju-log "Installing Cassandra"
54@@ -91,35 +110,78 @@
55
56 get_private_ip () {
57 # Make sure that we use the resolvable private address
58- # dig deals with both hostnames and ip addresses in a nice
59- # way - this has been tested in local and ec2 providers
60- # with ec2 and openstack
61- dig +short `unit-get private-address`
62-}
63-
64-get_seed_nodes () {
65- FORCE_SEED_NODES=$(config-get force-seed-nodes)
66- if [[ -n $FORCE_SEED_NODES ]]; then
67- echo $FORCE_SEED_NODES
68- else
69- relation-get seed-nodes | sed -e 's!\s\+!,!g'
70- fi
71-}
72-
73-set_seed_nodes () {
74- FORCE_SEED_NODES=$(config-get force-seed-nodes)
75- if [[ -n $FORCE_SEED_NODES ]]; then
76- juju-log "Setting seed_node to $FORCE_SEED_NODES"
77- relation-set seed-nodes="$FORCE_SEED_NODES"
78- else
79- juju-log "Setting seed_node to $(running_nodes)"
80- relation-set seed-nodes="$(running_nodes)"
81- fi
82+ # some dig versions dealxs with both hostnames and ip
83+ # addresses, but don't depend on that.
84+ local address=$(unit-get private-address)
85+ if [[ ${address} =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]]; then
86+ # already an IP
87+ echo "${address}"
88+ else
89+ # not an IP (some providers return hostnames), resolve it
90+ dig +short "${address}"
91+ fi
92+}
93+
94+relation-get-all() {
95+ # reltype is e.g. 'cluster'
96+ # relkey is e.g. 'seed-nodes'
97+ # default separator is space
98+ local reltype=${1:?} relkey=${2:?} separator="${3:- }"
99+ local rid unit value=
100+ # aggregate all values from every related unit
101+ value=$(
102+ for rid in $(relation-ids ${reltype});do
103+ for unit in $(relation-list -r ${rid});do
104+ relation-get -r ${rid} ${relkey} ${unit}
105+ done
106+ done
107+ )
108+ echo "${value}" | sort -u | paste -d "${separator}" -s
109+}
110+
111+write_seed_nodes() {
112+ local seeds="${1?}"
113+ # write to config if non-null seeds, or seed value coming from
114+ if [ -n "${seeds}" ] || [ "$(config-get force-seed-nodes)" != "" ] || \
115+ [ "$(config-get allow-single-node)" = "True" ]; then
116+ local seeds_comma=$(echo "$seeds" | sed -r 's/ +/,/g')
117+ juju-log "Setting seeds node to be '${seeds_comma}'"
118+ sed -i -e "s/\- seeds:.*/\- seeds: \"${seeds_comma}\"/" ${CASSANDRA_YML}
119+ else
120+ # This happens when e.g. the unit relates to a non-leader
121+ juju-log "Skipping seeds node setting (no seeds)"
122+ fi
123+}
124+
125+get_seed_nodes() {
126+ local force_seed_nodes="$(config-get force-seed-nodes)"
127+ local allow_single_node="$(config-get allow-single-node)"
128+ local seeds
129+
130+ if [ -n "${force_seed_nodes}" ]; then
131+ seeds="${force_seed_nodes}"
132+ elif [ "${allow_single_node}" == "True" ]; then
133+ seeds="$(get_private_ip)"
134+ # If running under relation context ...
135+ elif [ -n "${JUJU_RELATION_ID}" ]; then
136+ if am_i_the_leader; then
137+ seeds="$(running_nodes)"
138+ # Too early, maybe bootstraping the whole cluster ...
139+ if [ -z "${seeds}" ]; then
140+ seeds=$(get_private_ip)
141+ fi
142+ else
143+ seeds=$(relation-get-all cluster seed-nodes)
144+ fi
145+ fi
146+ juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]}: seeds='$seeds'"
147+ echo "${seeds}"
148 }
149
150 srv_root_get() {
151 sed -r -n 's,commitlog_directory: *([^ ]+)/commitlog.*,\1,p' ${CASSANDRA_YML}
152 }
153+
154 srv_root_from_volid() {
155 local volid=${1?missing volid}
156 local mntpoint
157@@ -193,7 +255,6 @@
158 local COMPACTION_THROUGHPUT=$(config-get compaction-throughput)
159 local STREAM_THROUGHPUT=$(config-get stream-throughput)
160 local NUM_TOKENS=$(config-get num-tokens)
161- local SEED_ADDRESSES=$(get_seed_nodes)
162 local INITIAL_TOKEN=$(get_initial_token)
163 local PARTITIONER=$(config-get partitioner)
164 local ENDPOINT_SNITCH=$(config-get endpoint_snitch)
165@@ -206,7 +267,6 @@
166 # specify this as a configuration parameter
167 # non-trivial sed to edit data_file_directories (has its value in the line following it)
168 sed -i -r -e "s/^cluster_name:.*/cluster_name: \'${CLUSTER_NAME}\'/" \
169- -e "s/(- seeds):.*/\1: \"${SEED_ADDRESSES}\"/" \
170 -e "s/^(storage_port):.*/\1: ${CLUSTER_PORT}/" \
171 -e "s/^(listen_address):.*/\1: ${LISTEN_ADDRESS}/" \
172 -e "s/^(rpc_address):.*/\1: ${LISTEN_ADDRESS}/" \
173@@ -273,6 +333,7 @@
174 juju-log "WARNING: found already existing cassandra data, migrating ${srv_root_curr} -> ${srv_root_new}"
175 migrate_cassandra_data ${srv_root_curr} ${srv_root_new}
176 fi
177+ fix_cassandra_perms ${srv_root_new}
178 srv_root_save ${srv_root_new}
179 fi
180 # Set the kernel block scheduler
181@@ -320,6 +381,7 @@
182 else
183 del_JVM_OPTS_line "JUJU_extra-jvm-opts"
184 fi
185+ reconfigure_cluster_seeds
186 }
187
188 # Remove JVM_OPTS line by regex
189@@ -346,19 +408,60 @@
190 }
191
192 # Service Control Commands
193+cassandra_is_running() {
194+ juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@"
195+ service cassandra status
196+}
197+
198+# wait until "$func $*" OK, loop $times sleeping $secs (default: 1)
199+wait_until() {
200+ local secs=${1:?} times=${2:?} func=${3:?}
201+ shift 3
202+ juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@"
203+ while let times=times-1; do
204+ if ${func} ${*}; then
205+ juju-log "${FUNCNAME[0]} $func $*: OK (times=$times left)"
206+ return 0
207+ fi
208+ juju-log "${FUNCNAME[0]} $func $*: sleep ${secs}, times=$times left"
209+ sleep ${secs}
210+ done
211+ return 1
212+}
213+
214+# ditto wait_until, with opposite logic
215+wait_until_not() {
216+ local secs=${1:?} times=${2:?} func=${3:?}
217+ juju-log "${FUNCNAME[1]}: ${FUNCNAME[0]} $@"
218+ while let times=times-1; do
219+ if ${func} ${*}; then
220+ :
221+ else
222+ juju-log "${FUNCNAME[0]} $func $*: OK (times=$times left)"
223+ return 0
224+ fi
225+ juju-log "${FUNCNAME[0]} $func $*: sleep ${secs}, times=$times left"
226+ sleep ${secs}
227+ done
228+ return 1
229+}
230+
231 restart_cassandra () {
232 juju-log "Restarting Cassandra"
233- service cassandra status && service cassandra restart || :
234+ service cassandra restart
235+ wait_until 1 60 cassandra_is_running
236 }
237
238 start_cassandra() {
239 juju-log "Starting Cassandra"
240 service cassandra status || service cassandra start
241+ wait_until 1 60 cassandra_is_running
242 }
243
244 stop_cassandra() {
245 juju-log "Stopping Cassandra"
246 service cassandra stop || :
247+ wait_until_not 1 60 cassandra_is_running
248 }
249
250 setup_database_interface () {
251@@ -371,19 +474,23 @@
252 relation-set port="$(config-get jmx-port)"
253 }
254
255-reconfigure_cluster_seeds () {
256- juju-log "Reconfiguring cluster seeds"
257- seeds=$(get_seed_nodes)
258- IP=`get_private_ip`
259- if [ -z "${seeds}" ] && am_i_the_leader; then
260- juju-log "Configuring myself locally as a seed node"
261- sed -i -e "s/\- seeds:.*/\- seeds: \"${IP}\"/" ${CASSANDRA_YML}
262- else
263- juju-log "Setting seeds node to be ${seeds}"
264- sed -i -e "s/\- seeds:.*/\- seeds: \"${seeds}\"/" ${CASSANDRA_YML}
265- fi
266- juju-log "$JUJU_REMOTE_UNIT modified its settings or departed"
267- bzr_ci || :
268+reconfigure_cluster_seeds() {
269+ juju-log "Reconfiguring cluster seeds"
270+ local seeds=$(get_seed_nodes)
271+ write_seed_nodes "${seeds}"
272+ # If under relation context, and I'm leader => propagate seed-nodes to other units.
273+ if [ -n "${seeds}" -a -n "${JUJU_RELATION_ID}" ] && am_i_the_leader; then
274+ juju-log "Setting relation seed-nodes to ${seeds}"
275+ relation-set seed-nodes="${seeds}"
276+ fi
277+}
278+
279+nodestatus_is() {
280+ local ip="${1:?}" wanted_status="${2:?}"
281+ local nodestatus="$(nodestatus ${ip})"
282+ juju-log "nodestatus='${nodestatus}' wanted_status='${wanted_status}'"
283+ [[ ${nodestatus} =~ ${wanted_status} ]]
284+ return $?
285 }
286
287 nodestatus() {
288@@ -410,31 +517,43 @@
289 paste -d " " -s
290 }
291
292+echo_yml_expression() {
293+ python -c "import yaml,sys;print yaml.load(sys.stdin)$1" < ${CASSANDRA_YML}
294+}
295+
296+current_file_seeds() {
297+ echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]'
298+}
299+
300 bootstrap () {
301- service cassandra status && return 0
302- seeds=$(get_seed_nodes)
303- if [ -n "${seeds}" ] || am_i_the_leader; then
304- start_cassandra
305+ local IP
306+ local count status
307+ if am_i_the_leader; then
308+ :
309+ else
310+ local file_seeds=$(current_file_seeds)
311+ case "${file_seeds}" in
312+ ""|127.*)
313+ juju-log "Returning soft-fail on not-yet received external seeds"
314+ return 0;;
315+ esac
316+ fi
317 juju-log "Waiting for this node to join the cluster"
318- IP=`get_private_ip`
319- count=0
320- status="$(nodestatus $IP)"
321- while [ "${status}" != "UpNormal" ] && [ $count -lt 40 ]; do
322- sleep 30
323- status="$(nodestatus $IP)"
324- count=$((count + 1))
325- juju-log "Waited $((count * 30)) seconds so far...(nodestatus=${status})"
326+ # wait until cassandra is UP ok, 3 retries
327+ # start_cassandra will no-op if already running
328+ count=3
329+ while let count=count-1; do
330+ start_cassandra
331+ if wait_until 60 2 nodestatus_is $(get_private_ip) "Up(Joining|Normal)"; then
332+ return 0
333+ fi
334 done
335- if [ $count -eq 40 ]; then
336- juju-log "Timed out waiting for node"
337- exit 2
338- fi
339- set_seed_nodes
340- fi
341+ juju-log "Timed out waiting for Up Normal/Joining"
342+ exit 2
343 }
344
345 remove_down_nodes () {
346- set_seed_nodes
347+ reconfigure_cluster_seeds
348 }
349
350 decommission_node () {
351@@ -469,8 +588,11 @@
352 export SERVICE_DESCRIPTION="Cassandra server heap usage"
353 export NRPE_CMD_NAME="check_${NAGIOS_CONTEXT}_cassandra_heap"
354 export NRPE_CMD="/usr/lib/nagios/plugins/check_cassandra_heap.sh ${IP} ${WARN_PCT} ${CRIT_PCT}"
355+ if [ ! -d "${NAGIOS_EXPORT_DIR}" ]; then
356+ mkdir -p "${NAGIOS_EXPORT_DIR}"
357+ fi
358 cheetah fill --env -p templates/nrpe_cmd_file.tmpl > /etc/nagios/nrpe.d/${NRPE_CMD_NAME}.cfg
359- cheetah fill --env -p templates/nrpe_service_file.tmpl > /var/lib/nagios/export/service__${NAGIOS_HOSTNAME}_${NRPE_CMD_NAME}.cfg
360+ cheetah fill --env -p templates/nrpe_service_file.tmpl > ${NAGIOS_EXPORT_DIR}/service__${NAGIOS_HOSTNAME}_${NRPE_CMD_NAME}.cfg
361 fi
362 }
363
364@@ -524,6 +646,9 @@
365 # Construct the cassandra.yaml file from the appropriate information above
366 configure_cassandra
367 bzr_ci && needs_restart=:
368+ # be sure to kick a cassandra restart if not
369+ # running yet
370+ cassandra_is_running || needs_restart=:
371 if [ $needs_restart ]; then
372 # Restart as required
373 restart_cassandra
374@@ -548,13 +673,18 @@
375 ;;
376 cluster-relation-joined)
377 juju-log "Joining cassandra cluster..."
378+ am_i_the_leader || sleep $(( 30 + RANDOM%15 ))
379 ;;
380 cluster-relation-changed)
381 reconfigure_cluster_seeds
382+ # if there were config changes, restart cassandra
383+ bzr_ci && restart_cassandra
384 bootstrap
385 ;;
386 cluster-relation-departed)
387 reconfigure_cluster_seeds
388+ # if there were config changes, restart cassandra
389+ bzr_ci && restart_cassandra
390 remove_down_nodes
391 ;;
392 jmx-relation-joined)
393
394=== modified file 'tests/test-cassandra-conf.sh'
395--- tests/test-cassandra-conf.sh 2013-11-21 01:00:58 +0000
396+++ tests/test-cassandra-conf.sh 2014-05-19 20:14:21 +0000
397@@ -122,10 +122,10 @@
398 test_set_config force-seed-nodes "${value}"
399 hook_main config-changed 2>&1 || exit 1
400 exp_ok test $(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = "${value}"
401- # and un-setting
402+ # and un-setting while bring back MY_IP
403 test_set_config force-seed-nodes ""
404 hook_main config-changed 2>&1 || exit 1
405- exp_ok test x$(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = x
406+ exp_ok test $(echo_yml_expression '["seed_provider"][0]["parameters"][0]["seeds"]') = "$MY_IP"
407 }
408 test_units_to_update() {
409 # Change something
410@@ -198,14 +198,20 @@
411 mkdir -p ${ETC_CASSANDRA}
412 # Create fake environment (juju and other cmds)
413 declare -A CONFIG
414-JUJU_UNIT_NAME="cassandra-test/0"
415+MY_IP="127.0.0.99"
416+export JUJU_UNIT_NAME="cassandra-test/0"
417+export JUJU_RELATION_ID=foo:1
418 open-port() { echo "DRY: open-port $@"; }
419+service() { echo "DRY: service $@"; }
420 config-get() { eval echo "\${CONFIG[$1]}"; }
421+running_nodes(){ echo "$MY_IP"; }
422 unit-get() { echo "$JUJU_UNIT_NAME"; }
423-juju-log() { echo "$@" ;}
424+juju-log() { echo "juju-log: $@" >&2 ;}
425 relation-get() { echo "" ;}
426+relation-list(){ echo "" ;}
427+relation-set() { : ;}
428 bzr() { echo "DRY: bzr $@"; }
429-dig() { [[ ${FUNCNAME[1]} == get_private_ip ]] && echo "127.0.0.99" ;}
430+dig() { [[ ${FUNCNAME[1]} == get_private_ip ]] && echo "$MY_IP" ;}
431 source_charm_code
432 CASSANDRA_USER=$(id -nu)
433 CASSANDRA_GROUP=$(id -ng)

Subscribers

People subscribed via source and target branches