On Tue, 2009-11-24 at 23:20 +0000, Julian Edwards wrote: > Review: Needs Fixing code > Thanks for making this change William. I've commented inline. Thanks for the review, Julian. I've fixed the main issues -- an intermediate diff is attached. > > === modified file 'lib/lp/archiveuploader/dscfile.py' > > --- lib/lp/archiveuploader/dscfile.py 2009-11-16 22:06:14 +0000 > > +++ lib/lp/archiveuploader/dscfile.py 2009-11-18 03:10:29 +0000 > > @@ -32,7 +32,8 @@ > > from lp.archiveuploader.utils import ( > > prefix_multi_line_string, safe_fix_maintainer, ParseMaintError, > > re_valid_pkg_name, re_valid_version, re_issource, > > - determine_source_file_type) > > + re_is_component_orig_tar_ext, determine_source_file_type, > > + get_source_file_extension) > > Can you put the imports in alphabetical order please. Done. > > from canonical.encoding import guess as guess_encoding > > from lp.registry.interfaces.person import IPersonSet, PersonCreationRationale > > from lp.registry.interfaces.sourcepackage import SourcePackageFileType > > @@ -347,8 +348,9 @@ > > distribution=self.policy.distro, > > purpose=ArchivePurpose.PARTNER)] > > elif (self.policy.archive.purpose == ArchivePurpose.PPA and > > - determine_source_file_type(filename) == > > - SourcePackageFileType.ORIG_TARBALL): > > + determine_source_file_type(filename) in ( > > + SourcePackageFileType.ORIG_TARBALL, > > + SourcePackageFileType.COMPONENT_ORIG_TARBALL)): > > archives = [self.policy.archive, self.policy.distro.main_archive] > > else: > > archives = [self.policy.archive] > > @@ -374,9 +376,13 @@ > > """ > > > > diff_count = 0 > > + debian_tar_count = 0 > > orig_tar_count = 0 > > + component_orig_tar_counts = {} > > native_tar_count = 0 > > > > + bzip2_count = 0 > > + > > files_missing = False > > for sub_dsc_file in self.files: > > filetype = determine_source_file_type(sub_dsc_file.filename) > > @@ -385,12 +391,23 @@ > > diff_count += 1 > > elif filetype == SourcePackageFileType.ORIG_TARBALL: > > orig_tar_count += 1 > > + elif filetype == SourcePackageFileType.COMPONENT_ORIG_TARBALL: > > + component = re_is_component_orig_tar_ext.match( > > + get_source_file_extension(sub_dsc_file.filename)).group(1) > > + if component not in component_orig_tar_counts: > > + component_orig_tar_counts[component] = 0 > > + component_orig_tar_counts[component] += 1 > > + elif filetype == SourcePackageFileType.DEBIAN_TARBALL: > > + debian_tar_count += 1 > > elif filetype == SourcePackageFileType.NATIVE_TARBALL: > > native_tar_count += 1 > > This big list of "elif" statements is looking a bit unwieldy, especially with all the new conditions. Unless you can suggest anything better, how about we do something like: > [snip] While your suggested solution would not work (at least not without the 'nonlocal' keyword), I believe I have found something better. I keep one dict mapping SPFTs to counts, and another mapping component names to counts. This means there's only one special case (for COMPONENT_ORIG_TARBALL), and the code for each format becomes much cleaner too. > > else: > > yield UploadError('Unknown file: ' + sub_dsc_file.filename) > > continue > > > > + if sub_dsc_file.filename.endswith('.bz2'): > > + bzip2_count += 1 > > + > > try: > > library_file, file_archive = self._getFileByName( > > sub_dsc_file.filename) > > @@ -440,6 +457,10 @@ > > yield UploadError( > > "%s: has more than one orig.tar.*." > > % self.filename) > > + if debian_tar_count > 1: > > + yield UploadError( > > + "%s: has more than one debian.tar.*." > > + % self.filename) > > if native_tar_count > 1: > > yield UploadError( > > "%s: has more than one tar.*." > > @@ -455,15 +476,73 @@ > > "%s: must have exactly one tar.* or orig.tar.*." > > % self.filename) > > > > + if native_tar_count > 0 and debian_tar_count > 0: > > + yield UploadError( > > + "%s: must have no more than one tar.* or debian.tar.*." > > + % self.filename) > > + > > # Format 1.0 must be native (exactly one tar.gz), or > > # have an orig.tar.gz and a diff.gz. It cannot have > > # compression types other than 'gz'. > > if self.format == '1.0': > > + if bzip2_count > 0: > > + yield UploadError( > > + "%s: is format 1.0 but uses bzip2 compression." > > + % self.filename) > > + > > if ((diff_count == 0 and native_tar_count == 0) or > > (diff_count > 0 and native_tar_count > 0)): > > yield UploadError( > > "%s: must have exactly one diff.gz or tar.gz." > > % self.filename) > > + > > + if debian_tar_count > 0: > > + yield UploadError( > > + "%s: is format 1.0 but has debian.tar.*." > > + % self.filename) > > + > > + if len(component_orig_tar_counts) > 0: > > + yield UploadError( > > + "%s: is format 1.0 but has orig-COMPONENT.tar.*." > > + % self.filename) > > + # Format 3.0 (native) must have exactly one tar.*. > > + # gz and bz2 are valid compression types. > > + elif self.format == '3.0 (native)': > > + if native_tar_count == 0: > > + yield UploadError( > > + "%s: must have exactly one tar.*." > > + % self.filename) > > + > > + if diff_count > 0: > > + yield UploadError( > > + "%s: is format 3.0 but has diff.gz." > > + % self.filename) > > + > > + if len(component_orig_tar_counts) > 0: > > + yield UploadError( > > + "%s: is native but has orig-COMPONENT.tar.*." > > + % self.filename) > > + elif self.format == '3.0 (quilt)': > > + if orig_tar_count == 0: > > + yield UploadError( > > + "%s: must have exactly one orig.tar.*." > > + % self.filename) > > + > > + if debian_tar_count == 0: > > + yield UploadError( > > + "%s: must have exactly one debian.tar.*." > > + % self.filename) > > + > > + if diff_count > 0: > > + yield UploadError( > > + "%s: is format 3.0 but has diff.gz." > > + % self.filename) > > + > > + for component in component_orig_tar_counts: > > + if component_orig_tar_counts[component] > 1: > > + yield UploadError( > > + "%s: has more than one orig-%s.tar.*." > > + % (self.filename, component)) > > Can you split this massive method up a bit please. Ideally into three separate > methods that check the different formats. I've split it, and completely changed the verification style. The failure messages are much less verbose, but one would have to manually edit the .dsc to get any of them anyway. I feel the reduction in code makes things much better. > That will also make it easier for you to unit test this change without creating > new test packages so that we can be sure all the new file type rejection cases > are handled. This does make testing much easier. However, there are now just a few code paths, but a lot of potential failure cases. How completely should I test? > Also, can you please change it to compare against the format enum instead > of strings. Then the assertion error below will make more sense. > > > else: > > raise AssertionError("Unknown source format.") It will now die with a KeyError when looking up the file checker, which seems as reasonable as the old explicit AssertionError. > > > > > > === modified file 'lib/lp/archiveuploader/nascentuploadfile.py' > > --- lib/lp/archiveuploader/nascentuploadfile.py 2009-11-10 13:09:26 +0000 > > +++ lib/lp/archiveuploader/nascentuploadfile.py 2009-11-18 03:10:30 +0000 > > @@ -352,8 +352,9 @@ > > "Architecture field." % (self.filename)) > > > > version_chopped = re_no_epoch.sub('', self.version) > > - if determine_source_file_type(self.filename) == ( > > - SourcePackageFileType.ORIG_TARBALL): > > + if determine_source_file_type(self.filename) in ( > > + SourcePackageFileType.ORIG_TARBALL, > > + SourcePackageFileType.COMPONENT_ORIG_TARBALL): > > I've seen this check a couple of times now, I think it would be a good idea to > refactor it into a property. (is_orig or something like that) The two uses are on different types: this is a SourceUploadFile, while the other is a SourcePackageReleaseFile. Should I instead create a utility function alongside SPFT, perhaps? > [snip]