Merge ubuntu-debuginfod:fix-poller-timestamp into ubuntu-debuginfod:master

Proposed by Sergio Durigan Junior
Status: Merged
Merged at revision: 825e53575702b5b126232abc36504c0db0b83448
Proposed branch: ubuntu-debuginfod:fix-poller-timestamp
Merge into: ubuntu-debuginfod:master
Diff against target: 218 lines (+50/-38)
3 files modified
ddebpoller.py (+34/-24)
debugpoller.py (+10/-9)
poll_launchpad.py (+6/-5)
Reviewer Review Type Date Requested Status
Lena Voytek (community) Approve
Bryce Harrington Pending
Athos Ribeiro Pending
Canonical Server Reporter Pending
Review via email: mp+435630@code.launchpad.net

Description of the change

Recently I noticed (kudos to schopin for his ping) that the poller has been missing several ddebs. I'm not entirely sure why that's happening, but I have a strong feeling that this has something to do with timestamps. ddeb-retriever does some manipulation in order to guarantee that it doesn't miss "publications arriving out of order". So I'm implementing the same thing here, in the hopes that it solves the problem.

I would like to deploy this in production and keep monitoring to check if the problem is solved.

To post a comment you must log in.
Revision history for this message
Lena Voytek (lvoytek) wrote :

The changes make sense and should work fine with the Python datetime library. I have some additional suggestions below but otherwise looks good to me

review: Approve
Revision history for this message
Sergio Durigan Junior (sergiodj) wrote :

Thanks, Lena. I've addressed your comments and will merge the code.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/ddebpoller.py b/ddebpoller.py
2index 09f3a1c..67a61b8 100644
3--- a/ddebpoller.py
4+++ b/ddebpoller.py
5@@ -19,6 +19,7 @@
6
7 import os
8 from urllib import parse
9+import datetime
10 from debugpoller import DebugPoller
11
12
13@@ -26,6 +27,10 @@ class DdebPoller(DebugPoller):
14 """Perform the Launchpad polling and obtain a list of ddebs and source
15 packages."""
16
17+ # The delta we subtract from a timestamp in order to make sure we
18+ # don't miss publications that may arrive out of order.
19+ TIMESTAMP_DELTA = datetime.timedelta(hours=1)
20+
21 def __init__(self, initial_interval=1, force_initial_interval=False, dry_run=False):
22 """Initialize the object using 'ddeb' as its name.
23
24@@ -37,38 +42,38 @@ class DdebPoller(DebugPoller):
25 dry_run=dry_run,
26 )
27
28- def _get_and_generate_timestamps(self):
29- """Get the previous timestamp and generate a new one."""
30+ def _get_normal_and_real_timestamps(self):
31+ """Get the previous timestamp and adjust it to account for
32+ publications arriving out of order."""
33 timestamp = self._get_timestamp()
34 if timestamp is None:
35 raise RuntimeError("_get_timestamp returned None")
36- if timestamp == "":
37- raise RuntimeError("_get_timestamp returned a blank timestamp")
38- new_timestamp = self._generate_timestamp()
39- if new_timestamp is None:
40- raise RuntimeError("_generate_timestamp returned None")
41- if new_timestamp == "":
42- raise RuntimeError("_generate_timestamp returned a blank timestamp")
43- return timestamp, new_timestamp
44+ if not isinstance(timestamp, datetime.datetime):
45+ raise RuntimeError(
46+ f"_get_timestamp returned an invalid timestamp ({timestamp})"
47+ )
48+ # Allow a grace period to cope with publications arriving out of
49+ # order during long transactions.
50+ real_timestamp = timestamp - self.TIMESTAMP_DELTA
51+ return timestamp, real_timestamp
52
53 def get_ddebs(self):
54 """Get the list of ddebs that have been published since the last
55 timestamp from Launchpad.
56
57- :rtype: dict, str
58+ :rtype: dict, datetime.datetime
59
60 Return a dictionary containing all ddebs found, and also the
61 new timestamp that should then be recorded by calling
62 record_timestamp."""
63- timestamp, new_timestamp = self._get_and_generate_timestamps()
64+ timestamp, real_timestamp = self._get_normal_and_real_timestamps()
65
66- self._logger.info(
67- f"Polling ddebs created since '{timestamp}' (it's now '{new_timestamp}')"
68- )
69+ self._logger.info(f"Polling ddebs created since '{real_timestamp}'")
70
71 result = []
72+ latest_timestamp_created = timestamp
73 for pkg in self._main_archive.getPublishedBinaries(
74- order_by_date=True, created_since_date=timestamp
75+ order_by_date=True, created_since_date=real_timestamp
76 ):
77 if pkg.status not in ("Pending", "Published"):
78 continue
79@@ -80,6 +85,9 @@ class DdebPoller(DebugPoller):
80 # Safety check.
81 continue
82
83+ if pkg.date_created > latest_timestamp_created:
84+ latest_timestamp_created = pkg.date_created
85+
86 srcname = pkg.source_package_name
87 srcver = pkg.source_package_version
88 component = pkg.component_name
89@@ -104,26 +112,25 @@ class DdebPoller(DebugPoller):
90 }
91 result.append(msg)
92
93- return result, new_timestamp
94+ return result, latest_timestamp_created
95
96 def get_sources(self):
97 """Get the list of source packages that have been published since the
98 last timestamp from Launchpad.
99
100- :rtype: dict, str
101+ :rtype: dict, datetime.datetime
102
103 Return a dictionary containing all source packages found, and
104 also the new timestamp that should then be recorded by calling
105 record_timestamp."""
106- timestamp, new_timestamp = self._get_and_generate_timestamps()
107+ timestamp, real_timestamp = self._get_normal_and_real_timestamps()
108
109- self._logger.info(
110- f"Polling source packages created since '{timestamp}' (it's now '{new_timestamp}')"
111- )
112+ self._logger.info(f"Polling source packages created since '{real_timestamp}'")
113
114 result = []
115+ latest_timestamp_created = timestamp
116 for pkg in self._main_archive.getPublishedSources(
117- order_by_date=True, created_since_date=timestamp
118+ order_by_date=True, created_since_date=real_timestamp
119 ):
120 if pkg.status not in ("Pending", "Published"):
121 continue
122@@ -133,6 +140,9 @@ class DdebPoller(DebugPoller):
123 # Safety check.
124 continue
125
126+ if pkg.date_created > latest_timestamp_created:
127+ latest_timestamp_created = pkg.date_created
128+
129 srcname = pkg.source_package_name
130 srcver = pkg.source_package_version
131 component = pkg.component_name
132@@ -146,4 +156,4 @@ class DdebPoller(DebugPoller):
133 }
134 result.append(msg)
135
136- return result, new_timestamp
137+ return result, latest_timestamp_created
138diff --git a/debugpoller.py b/debugpoller.py
139index 2d74723..c29ea5d 100644
140--- a/debugpoller.py
141+++ b/debugpoller.py
142@@ -31,9 +31,6 @@ class DebugPoller:
143 # The timestamp file we will save our timestamp into.
144 TIMESTAMP_FILE = os.path.expanduser("~/.config/ubuntu-debuginfod/timestamp")
145
146- # The timestamp format we use. Launchpad requires this format.
147- TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S"
148-
149 def __init__(
150 self,
151 module_name,
152@@ -92,8 +89,8 @@ class DebugPoller:
153 self._logger.debug(f"Generating timestamp with interval {interval}")
154 d = d - datetime.timedelta(hours=interval)
155
156- self._logger.debug(f"Generated timestamp '{d.strftime(self.TIMESTAMP_FORMAT)}'")
157- return d.strftime(self.TIMESTAMP_FORMAT)
158+ self._logger.debug(f"Generated timestamp '{d}'")
159+ return d
160
161 def _get_timestamp(self):
162 """Get the timestamp from the timestamp file, or generate a
163@@ -109,13 +106,15 @@ class DebugPoller:
164 return self._generate_timestamp(interval=self._initial_interval)
165
166 with open(self.TIMESTAMP_FILE, "r", encoding="UTF-8") as f:
167- return f.readline().rstrip()
168+ ts = int(f.readline().rstrip())
169+ d = datetime.datetime.fromtimestamp(ts, tz=datetime.timezone.utc)
170+ return d
171
172 def record_timestamp(self, timestamp):
173 """Save the timestamp into the timestamp file.
174
175- :param str timestamp: Timestamp that should be saved into the
176- TIMESTAMP_FILE."""
177+ :param datetime.datetime timestamp: Timestamp that should be
178+ saved into the TIMESTAMP_FILE."""
179 if self._dry_run:
180 self._logger.debug("dry_run enabled, not recording timestamp file")
181 return
182@@ -123,7 +122,9 @@ class DebugPoller:
183 dirname = os.path.dirname(self.TIMESTAMP_FILE)
184 os.makedirs(dirname, mode=0o755, exist_ok=True)
185 with open(self.TIMESTAMP_FILE, "w", encoding="UTF-8") as f:
186- f.write(timestamp)
187+ epoch = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
188+ ts = int((timestamp - epoch).total_seconds())
189+ f.write("%d" % ts)
190
191 def set_initial_interval(self, initial_interval):
192 """Set the initial_interval value.
193diff --git a/poll_launchpad.py b/poll_launchpad.py
194index baeab6f..77259b4 100644
195--- a/poll_launchpad.py
196+++ b/poll_launchpad.py
197@@ -97,17 +97,18 @@ def poll_launchpad(conn, cursor):
198 poller = DdebPoller()
199
200 # Process ddebs.
201- messages, new_timestamp = poller.get_ddebs()
202+ messages, new_timestamp_ddebs = poller.get_ddebs()
203 for msg in messages:
204 _process_msg(msg, conn, cursor)
205
206- # Process source packages. We ignore the new timestamp returned
207- # by the method because we'll use the one returned previously by
208- # get_ddebs.
209- messages, _ = poller.get_sources()
210+ # Process source packages.
211+ messages, new_timestamp_src = poller.get_sources()
212 for msg in messages:
213 _process_msg(msg, conn, cursor)
214
215+ # Choose the earliest timestamp.
216+ new_timestamp = min(new_timestamp_ddebs, new_timestamp_src)
217+
218 poller.record_timestamp(new_timestamp)
219
220

Subscribers

People subscribed via source and target branches

to all changes: