Merge lp:~thomir-deactivatedaccount/core-result-checker/trunk-check-swift-better into lp:core-result-checker

Proposed by Thomi Richards
Status: Merged
Approved by: Thomi Richards
Approved revision: 15
Merged at revision: 13
Proposed branch: lp:~thomir-deactivatedaccount/core-result-checker/trunk-check-swift-better
Merge into: lp:core-result-checker
Diff against target: 175 lines (+78/-18)
1 file modified
core_result_checker/__init__.py (+78/-18)
To merge this branch: bzr merge lp:~thomir-deactivatedaccount/core-result-checker/trunk-check-swift-better
Reviewer Review Type Date Requested Status
Celso Providelo (community) Approve
Review via email: mp+255320@code.launchpad.net

Commit message

Wait for swift to be consistent.

Description of the change

Add a check for swift to become consistent before we try and make the container public.

To post a comment you must log in.
Revision history for this message
Celso Providelo (cprov) wrote :

Right, let's see the SwiftManager in action.

I just think that 10 minutes spinning message waiting for swift seems excessive (I can see the logging storm already), but we shall see ...

For situations like this I'd prefer moving jobs quickly (< 30 s) to the deadletter queue and have proper monitoring and replay scripts.

review: Approve
15. By Thomi Richards

Set default timeout to 60 seconds, not 600.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'core_result_checker/__init__.py'
2--- core_result_checker/__init__.py 2015-04-07 00:06:54 +0000
3+++ core_result_checker/__init__.py 2015-04-07 04:03:38 +0000
4@@ -17,6 +17,7 @@
5
6 import argparse
7 import configparser
8+from datetime import datetime
9 import logging
10 import os
11
12@@ -37,9 +38,9 @@
13
14 class Worker(object):
15
16- def __init__(self, swift_publisher, retry_publisher):
17+ def __init__(self, swift_manager, retry_publisher):
18 get_logger(__name__).info("Service Started.")
19- self.swift_publisher = swift_publisher
20+ self.swift_manager = swift_manager
21 self.retry_publisher = retry_publisher
22
23 def __call__(self, message):
24@@ -54,6 +55,7 @@
25 logger.error("Unable to unpack incoming message: %s", str(e))
26 return MessageActions.Retry
27
28+ # check to see if adt-run reported infrastructure failures:
29 if int(exit_code) in (16, 20, 100):
30 logger.info(
31 "Test run infrastructure failed (exit code %s), retrying.",
32@@ -66,8 +68,18 @@
33 device,
34 image_name,
35 )
36+
37+ # check to see if swift is consistent yet:
38+ if not self.swift_manager.container_contains_file(
39+ container_name,
40+ "results.tgz"
41+ ):
42+ logger.warning("Swift container not yet consistent.")
43+ self.retry_publisher.retry_for_swift(message)
44+ return MessageActions.Acknowledge
45+
46 try:
47- self.swift_publisher.make_container_public(container_name)
48+ self.swift_manager.make_container_public(container_name)
49 except RuntimeError as e:
50 logger.error(
51 "Unable to publish swift container '%s'. Error: %s.",
52@@ -105,9 +117,9 @@
53 return config
54
55
56-class SwiftPublisher(object):
57+class SwiftManager(object):
58
59- """A class that knows how to make a swift container public."""
60+ """A class that knows how to interact with swift."""
61
62 def __init__(self, nova_config):
63 self.config = nova_config
64@@ -131,6 +143,11 @@
65 )
66 )
67
68+ def container_contains_file(self, container_name, file_name):
69+ swift = SwiftService(self.config)
70+ result = next(swift.stat(container_name, [file_name]))
71+ return result['success']
72+
73
74 class RetryPublisher(object):
75
76@@ -139,9 +156,10 @@
77 def __init__(self, connection, max_retries):
78 self.connection = connection
79 self.max_retries = max_retries
80+ self.swift_retry_seconds = 60
81
82 def retry_test_run(self, payload):
83- """Maybe retry a test payload.
84+ """Maybe retry a test payload. Due to adt-run reported failure.
85
86 This will look at it's retry count, and either requeue it in the
87 test queue, or insert it into the dead letter queue.
88@@ -149,23 +167,65 @@
89 """
90 retry_count = int(payload.get('test_run_retry_count', '0'))
91 if retry_count < self.max_retries:
92- q = self.connection.SimpleQueue(
93- "core.tests.{}".format(constants.API_VERSION)
94- )
95 payload['test_run_retry_count'] = retry_count + 1
96- q.put(payload)
97- q.close()
98+ self._insert_payload_into_queue(
99+ "core.tests.{}".format(constants.API_VERSION),
100+ payload
101+ )
102 else:
103 logger = get_logger(__name__, payload)
104 logger.error(
105 "Test retry count exceeds maximum. Inserting into dead "
106 "letter queue"
107 )
108- q = self.connection.SimpleQueue(
109- "core.deadletters.{}".format(constants.API_VERSION)
110- )
111- q.put(payload)
112- q.close()
113+ self._insert_payload_into_queue(
114+ "core.deadletters.{}".format(constants.API_VERSION),
115+ payload
116+ )
117+
118+ def retry_for_swift(self, payload):
119+ """Maybe retry a test payload while waiting for swift.
120+
121+ This will look at the first time the message was checked, and give a
122+ certain grace period. After that period is over, we will put the
123+ payload into the dead letter queue.
124+
125+ """
126+ swift_check_key = 'swift_first_check_time'
127+ first_swift_check_time = payload.get(swift_check_key)
128+ if first_swift_check_time is None:
129+ first_swift_check_time = datetime.utcnow()
130+ payload[swift_check_key] = str(first_swift_check_time.timestamp())
131+ else:
132+ first_swift_check_time = datetime.fromtimestamp(
133+ float(first_swift_check_time)
134+ )
135+ now = datetime.utcnow()
136+ time_since_first_check = now - first_swift_check_time
137+ logger = get_logger(__name__, payload)
138+ if time_since_first_check.total_seconds() < self.swift_retry_seconds:
139+ logger.info(
140+ "Requeueing message since swift is not yet consistent"
141+ )
142+ self._insert_payload_into_queue(
143+ "core.result.{}".format(constants.API_VERSION),
144+ payload
145+ )
146+ else:
147+ logger.error(
148+ "swift is still not consistent after %d seconds. Inserting "
149+ "message into dead letter queue.",
150+ self.swift_retry_seconds
151+ )
152+ self._insert_payload_into_queue(
153+ "core.deadletters.{}".format(constants.API_VERSION),
154+ payload
155+ )
156+
157+ def _insert_payload_into_queue(self, queue, payload):
158+ q = self.connection.SimpleQueue(queue)
159+ q.put(payload)
160+ q.close()
161
162
163 def main():
164@@ -184,9 +244,9 @@
165 )
166 try:
167 with kombu.Connection(amqp_uris) as connection:
168- swift_publisher = SwiftPublisher(config['nova'])
169+ swift_manager = SwiftManager(config['nova'])
170 retry_publisher = RetryPublisher(connection, 3)
171- worker = Worker(swift_publisher, retry_publisher)
172+ worker = Worker(swift_manager, retry_publisher)
173 queue_monitor = SimpleRabbitQueueWorker(
174 connection,
175 "core.result.{}".format(constants.API_VERSION),

Subscribers

People subscribed via source and target branches

to all changes: