Merge ubuntu-debuginfod:prepare-poller-for-ppa-support into ubuntu-debuginfod:master

Proposed by Sergio Durigan Junior
Status: Merged
Merged at revision: d561d51dee41203d271c7b4cb106de282c858024
Proposed branch: ubuntu-debuginfod:prepare-poller-for-ppa-support
Merge into: ubuntu-debuginfod:master
Diff against target: 335 lines (+141/-54)
2 files modified
ddebpoller.py (+72/-31)
debugpoller.py (+69/-23)
Reviewer Review Type Date Requested Status
Bryce Harrington (community) Approve
Lena Voytek Pending
Athos Ribeiro Pending
Canonical Server Reporter Pending
Review via email: mp+438485@code.launchpad.net

Commit message

This MP implements improvements/adjustments that are needed in the "poller" classes for the upcoming PPA support.

There's not much else to say; I'm following the same approach as in https://code.launchpad.net/~sergiodj/ubuntu-debuginfod/+git/ubuntu-debuginfod/+merge/438422.

To post a comment you must log in.
Revision history for this message
Bryce Harrington (bryce) wrote :

Some random thoughts below, hope it helps!

review: Approve
Revision history for this message
Sergio Durigan Junior (sergiodj) wrote :

Thanks for the review, Bryce. I went through your comments and made the corresponding changes to the branch.

Revision history for this message
Sergio Durigan Junior (sergiodj) wrote :

For Lena and Athos: I will go ahead and merge the MP, but please feel free to review it and leave comments as you see fit; I can always make subsequential commits to address whatever is needed.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/ddebpoller.py b/ddebpoller.py
2index e4f586b..25b24ec 100644
3--- a/ddebpoller.py
4+++ b/ddebpoller.py
5@@ -27,52 +27,62 @@ class DdebPoller(DebugPoller):
6 """Perform the Launchpad polling and obtain a list of ddebs and source
7 packages."""
8
9- # The delta we subtract from a timestamp in order to make sure we
10- # don't miss publications that may arrive out of order.
11- TIMESTAMP_DELTA = datetime.timedelta(hours=1)
12-
13- def __init__(self, initial_interval=1, force_initial_interval=False, dry_run=False):
14+ def __init__(
15+ self,
16+ module_name="ddeb",
17+ initial_interval=1,
18+ force_initial_interval=False,
19+ fetch_all_on_first_run=False,
20+ dry_run=False,
21+ anonymous=True,
22+ ):
23 """Initialize the object using 'ddeb' as its name.
24
25 Look at DebugPoller's docstring for an explanation about the arguments."""
26 super().__init__(
27- "ddeb",
28+ module_name=module_name,
29 initial_interval=initial_interval,
30 force_initial_interval=force_initial_interval,
31+ fetch_all_on_first_run=fetch_all_on_first_run,
32 dry_run=dry_run,
33+ anonymous=anonymous,
34 )
35
36- def _get_normal_and_real_timestamps(self):
37- """Get the previous timestamp and adjust it to account for
38- publications arriving out of order."""
39- timestamp = self._get_timestamp()
40- if timestamp is None:
41- raise RuntimeError("_get_timestamp returned None")
42- if not isinstance(timestamp, datetime.datetime):
43- raise RuntimeError(
44- f"_get_timestamp returned an invalid timestamp ({timestamp})"
45- )
46- # Allow a grace period to cope with publications arriving out of
47- # order during long transactions.
48- real_timestamp = timestamp - self.TIMESTAMP_DELTA
49- return timestamp, real_timestamp
50-
51- def get_ddebs(self):
52+ def get_ddebs(self, archive=None, ppainfo=None):
53 """Get the list of ddebs that have been published since the last
54 timestamp from Launchpad.
55
56+ :param archive Launchpad archive: The Launchpad archive that
57+ should be queried for new binaries. Defaults to
58+ self._main_archive.
59+
60+ :param ppainfo dict(str, str): A dictionary containing three
61+ keys: "ppauser", "ppaname" and "isprivateppa". It should
62+ only be provided when dealing with a PPA; otherwise, the
63+ default is None.
64+
65 :rtype: dict, datetime.datetime
66
67 Return a dictionary containing all ddebs found, and also the
68 new timestamp that should then be recorded by calling
69- record_timestamp."""
70+ record_timestamp.
71+
72+ """
73 timestamp, real_timestamp = self._get_normal_and_real_timestamps()
74
75- self._logger.info(f"Polling ddebs created since '{real_timestamp}'")
76+ if archive is None:
77+ archive = self._main_archive
78+
79+ if ppainfo is None:
80+ archive_label = f"'{archive.displayname}' archive"
81+ else:
82+ archive_label = f"ppa:{ppainfo['ppauser']}/{ppainfo['ppaname']} (private: {ppainfo['isprivateppa']})"
83+
84+ self._logger.info(f"Polling ddebs created since '{real_timestamp}' from {archive_label}")
85
86 result = []
87 latest_timestamp_created = timestamp
88- for pkg in self._main_archive.getPublishedBinaries(
89+ for pkg in archive.getPublishedBinaries(
90 order_by_date=True, created_since_date=real_timestamp
91 ):
92 if pkg.status not in ("Pending", "Published"):
93@@ -85,7 +95,10 @@ class DdebPoller(DebugPoller):
94 # Safety check.
95 continue
96
97- if pkg.date_created > latest_timestamp_created:
98+ if (
99+ latest_timestamp_created is None
100+ or pkg.date_created > latest_timestamp_created
101+ ):
102 latest_timestamp_created = pkg.date_created
103
104 srcname = pkg.source_package_name
105@@ -112,26 +125,48 @@ class DdebPoller(DebugPoller):
106 "ddeb_filename": ddeb,
107 "architecture": arch,
108 }
109+ if ppainfo is not None:
110+ msg.update(ppainfo)
111+
112 result.append(msg)
113
114 return result, latest_timestamp_created
115
116- def get_sources(self):
117+ def get_sources(self, archive=None, ppainfo=None):
118 """Get the list of source packages that have been published since the
119 last timestamp from Launchpad.
120
121+ :param archive Launchpad archive: The Launchpad archive that
122+ should be queried for new sources. Defaults to
123+ self._main_archive.
124+
125+ :param ppainfo dict(str, str): A dictionary containing three
126+ keys: "ppauser", "ppaname" and "isprivateppa". It should
127+ only be provided when dealing with a PPA; otherwise, the
128+ default is None.
129+
130 :rtype: dict, datetime.datetime
131
132 Return a dictionary containing all source packages found, and
133 also the new timestamp that should then be recorded by calling
134- record_timestamp."""
135+ record_timestamp.
136+
137+ """
138 timestamp, real_timestamp = self._get_normal_and_real_timestamps()
139
140- self._logger.info(f"Polling source packages created since '{real_timestamp}'")
141+ if archive is None:
142+ archive = self._main_archive
143+
144+ if ppainfo is None:
145+ archive_label = f"'{archive.displayname}' archive"
146+ else:
147+ archive_label = f"ppa:{ppainfo['ppauser']}/{ppainfo['ppaname']} (private: {ppainfo['isprivateppa']})"
148+
149+ self._logger.info(f"Polling source packages created since '{real_timestamp}' from {archive_label}")
150
151 result = []
152 latest_timestamp_created = timestamp
153- for pkg in self._main_archive.getPublishedSources(
154+ for pkg in archive.getPublishedSources(
155 order_by_date=True, created_since_date=real_timestamp
156 ):
157 if pkg.status not in ("Pending", "Published"):
158@@ -142,7 +177,10 @@ class DdebPoller(DebugPoller):
159 # Safety check.
160 continue
161
162- if pkg.date_created > latest_timestamp_created:
163+ if (
164+ latest_timestamp_created is None
165+ or pkg.date_created > latest_timestamp_created
166+ ):
167 latest_timestamp_created = pkg.date_created
168
169 srcname = pkg.source_package_name
170@@ -156,6 +194,9 @@ class DdebPoller(DebugPoller):
171 "source_urls": src_urls,
172 "architecture": "source",
173 }
174+ if ppainfo is not None:
175+ msg.update(ppainfo)
176+
177 result.append(msg)
178
179 return result, latest_timestamp_created
180diff --git a/debugpoller.py b/debugpoller.py
181index c29ea5d..4e4a657 100644
182--- a/debugpoller.py
183+++ b/debugpoller.py
184@@ -31,19 +31,20 @@ class DebugPoller:
185 # The timestamp file we will save our timestamp into.
186 TIMESTAMP_FILE = os.path.expanduser("~/.config/ubuntu-debuginfod/timestamp")
187
188+ # The delta we subtract from a timestamp in order to make sure we
189+ # don't miss publications that may arrive out of order.
190+ TIMESTAMP_DELTA = datetime.timedelta(hours=1)
191+
192 def __init__(
193 self,
194- module_name,
195 initial_interval=1,
196 force_initial_interval=False,
197+ fetch_all_on_first_run=False,
198 dry_run=False,
199+ anonymous=True,
200 ):
201 """Initalize the DebugPoller object.
202
203- :param str module_name: Name of the DebugPoller module. This
204- is used to compose the timestamp filename so that it is
205- unique.
206-
207 :param int initial_interval: The initial interval (in hours),
208 i.e., if no timestamp file has been found then we use this
209 value to tell Launchpad that we want files created since
210@@ -53,14 +54,31 @@ class DebugPoller:
211 the initial interval to calculate the timestamp, instead
212 of relying on the timestamp file. Default is False.
213
214+ :param bool fetch_all_on_first_run: Instruct the poller to
215+ fetch everything from the archive when running for the
216+ first time. Useful to (re)build the entire pool of
217+ artifacts. In order to determine whether we're running
218+ for the first time, we check if the timestamp file exists.
219+ Default is False.
220+
221 :param bool dry_run: Tell the poller that it shouldn't record
222 the timestamp in the file when the operation finishes.
223- Default is False."""
224- self._lp = Launchpad.login_anonymously(
225- "ubuntu-debuginfod poller", "production", version="devel"
226- )
227+ Default is False.
228+
229+ :param bool anonymous: Whether we should login anonymously.
230+ Default is True.
231+
232+ """
233+ if anonymous:
234+ self._lp = Launchpad.login_anonymously(
235+ "ubuntu-debuginfod poller", "production", version="devel"
236+ )
237+ else:
238+ self._lp = Launchpad.login_with(
239+ "ubuntu-debuginfod poller", "production", version="devel"
240+ )
241+
242 self._main_archive = self._lp.distributions["ubuntu"].main_archive
243- self.TIMESTAMP_FILE = self.TIMESTAMP_FILE + f"-{module_name}"
244
245 self._logger = logging.getLogger(__name__)
246 logging.basicConfig(
247@@ -73,11 +91,20 @@ class DebugPoller:
248
249 self._initial_interval = initial_interval
250 self._force_initial_interval = force_initial_interval
251+ self._fetch_all_on_first_run = fetch_all_on_first_run
252 self._dry_run = dry_run
253
254+ @property
255+ def timestamp_filename(self):
256+ """Get the filename that contains the timestamp for this module.
257+
258+ :rtype str: The timestamp filename."""
259+ module_name = type(self).__name__.replace('Poller', '').lower()
260+ return f"{self.TIMESTAMP_FILE}-{module_name}"
261+
262 def _generate_timestamp(self, interval=None):
263 """Generate a timestamp that can be used when querying Launchpad
264- (via getPublishedSources).
265+ (via getPublished{Sources,Binaries}).
266
267 :param interval: Specify how long ago (in hours) the timestamp must
268 refer to. If not specified, the timestamp is generated for
269@@ -97,19 +124,43 @@ class DebugPoller:
270 new one.
271
272 If a timestamp file exists, returns its value. Otherwise,
273- generate a new one with self._initial_interval."""
274- if not os.path.exists(self.TIMESTAMP_FILE) or self._force_initial_interval:
275- self._logger.debug(f"Timestamp file '{self.TIMESTAMP_FILE}' doesn't exist")
276+ generate a new one with self._initial_interval.
277+
278+ :raises RuntimeError: Invalid timestamp generated."""
279+ tfile = self.timestamp_filename
280+
281+ if self._fetch_all_on_first_run and not os.path.exists(tfile):
282+ self._logger.debug("Fetching everything on first run")
283+ return None
284+
285+ if self._force_initial_interval or not os.path.exists(tfile):
286+ self._logger.debug(f"Timestamp file '{tfile}' doesn't exist")
287 self._logger.debug(
288 f"Or force_initial_interval = {self._force_initial_interval}"
289 )
290 return self._generate_timestamp(interval=self._initial_interval)
291
292- with open(self.TIMESTAMP_FILE, "r", encoding="UTF-8") as f:
293+ with open(tfile, "r", encoding="UTF-8") as f:
294 ts = int(f.readline().rstrip())
295 d = datetime.datetime.fromtimestamp(ts, tz=datetime.timezone.utc)
296+
297+ if not isinstance(d, datetime.datetime):
298+ raise RuntimeError(f"Invalid timestamp {d}")
299 return d
300
301+ def _get_normal_and_real_timestamps(self):
302+ """Get the previous timestamp and adjust it to account for
303+ publications arriving out of order."""
304+ tfile = self.timestamp_filename
305+ if not os.path.exists(tfile) and self._fetch_all_on_first_run:
306+ return None, None
307+
308+ timestamp = self._get_timestamp()
309+ # Allow a grace period to cope with publications arriving out of
310+ # order during long transactions.
311+ real_timestamp = timestamp - self.TIMESTAMP_DELTA
312+ return timestamp, real_timestamp
313+
314 def record_timestamp(self, timestamp):
315 """Save the timestamp into the timestamp file.
316
317@@ -119,15 +170,10 @@ class DebugPoller:
318 self._logger.debug("dry_run enabled, not recording timestamp file")
319 return
320
321- dirname = os.path.dirname(self.TIMESTAMP_FILE)
322+ tfile = self.timestamp_filename
323+ dirname = os.path.dirname(tfile)
324 os.makedirs(dirname, mode=0o755, exist_ok=True)
325- with open(self.TIMESTAMP_FILE, "w", encoding="UTF-8") as f:
326+ with open(tfile, "w", encoding="UTF-8") as f:
327 epoch = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
328 ts = int((timestamp - epoch).total_seconds())
329 f.write("%d" % ts)
330-
331- def set_initial_interval(self, initial_interval):
332- """Set the initial_interval value.
333-
334- :param int initial_interval: The new initial_interval to be used."""
335- self._initial_interval = initial_interval

Subscribers

People subscribed via source and target branches

to all changes: