Thanks for making this change William. I've commented inline. > === modified file 'lib/lp/archiveuploader/dscfile.py' > --- lib/lp/archiveuploader/dscfile.py 2009-11-16 22:06:14 +0000 > +++ lib/lp/archiveuploader/dscfile.py 2009-11-18 03:10:29 +0000 > @@ -32,7 +32,8 @@ > from lp.archiveuploader.utils import ( > prefix_multi_line_string, safe_fix_maintainer, ParseMaintError, > re_valid_pkg_name, re_valid_version, re_issource, > - determine_source_file_type) > + re_is_component_orig_tar_ext, determine_source_file_type, > + get_source_file_extension) Can you put the imports in alphabetical order please. > from canonical.encoding import guess as guess_encoding > from lp.registry.interfaces.person import IPersonSet, PersonCreationRationale > from lp.registry.interfaces.sourcepackage import SourcePackageFileType > @@ -347,8 +348,9 @@ > distribution=self.policy.distro, > purpose=ArchivePurpose.PARTNER)] > elif (self.policy.archive.purpose == ArchivePurpose.PPA and > - determine_source_file_type(filename) == > - SourcePackageFileType.ORIG_TARBALL): > + determine_source_file_type(filename) in ( > + SourcePackageFileType.ORIG_TARBALL, > + SourcePackageFileType.COMPONENT_ORIG_TARBALL)): > archives = [self.policy.archive, self.policy.distro.main_archive] > else: > archives = [self.policy.archive] > @@ -374,9 +376,13 @@ > """ > > diff_count = 0 > + debian_tar_count = 0 > orig_tar_count = 0 > + component_orig_tar_counts = {} > native_tar_count = 0 > > + bzip2_count = 0 > + > files_missing = False > for sub_dsc_file in self.files: > filetype = determine_source_file_type(sub_dsc_file.filename) > @@ -385,12 +391,23 @@ > diff_count += 1 > elif filetype == SourcePackageFileType.ORIG_TARBALL: > orig_tar_count += 1 > + elif filetype == SourcePackageFileType.COMPONENT_ORIG_TARBALL: > + component = re_is_component_orig_tar_ext.match( > + get_source_file_extension(sub_dsc_file.filename)).group(1) > + if component not in component_orig_tar_counts: > + component_orig_tar_counts[component] = 0 > + component_orig_tar_counts[component] += 1 > + elif filetype == SourcePackageFileType.DEBIAN_TARBALL: > + debian_tar_count += 1 > elif filetype == SourcePackageFileType.NATIVE_TARBALL: > native_tar_count += 1 This big list of "elif" statements is looking a bit unwieldy, especially with all the new conditions. Unless you can suggest anything better, how about we do something like: def orig_tarball_handler: orig_tar_count += 1 def .... filetype_to_action_map = { SourcePackageFileType.ORIG_TARBALL : orig_tarball_handler, ... } # Or you could use lambda funcs there --^ it's probably tidier. file_type = determine_source_file_type(sub_dsc_file.filename) try: filetype_to_action_map[file_type]() except KeyError: yield UploadError ... > else: > yield UploadError('Unknown file: ' + sub_dsc_file.filename) > continue > > + if sub_dsc_file.filename.endswith('.bz2'): > + bzip2_count += 1 > + > try: > library_file, file_archive = self._getFileByName( > sub_dsc_file.filename) > @@ -440,6 +457,10 @@ > yield UploadError( > "%s: has more than one orig.tar.*." > % self.filename) > + if debian_tar_count > 1: > + yield UploadError( > + "%s: has more than one debian.tar.*." > + % self.filename) > if native_tar_count > 1: > yield UploadError( > "%s: has more than one tar.*." > @@ -455,15 +476,73 @@ > "%s: must have exactly one tar.* or orig.tar.*." > % self.filename) > > + if native_tar_count > 0 and debian_tar_count > 0: > + yield UploadError( > + "%s: must have no more than one tar.* or debian.tar.*." > + % self.filename) > + > # Format 1.0 must be native (exactly one tar.gz), or > # have an orig.tar.gz and a diff.gz. It cannot have > # compression types other than 'gz'. > if self.format == '1.0': > + if bzip2_count > 0: > + yield UploadError( > + "%s: is format 1.0 but uses bzip2 compression." > + % self.filename) > + > if ((diff_count == 0 and native_tar_count == 0) or > (diff_count > 0 and native_tar_count > 0)): > yield UploadError( > "%s: must have exactly one diff.gz or tar.gz." > % self.filename) > + > + if debian_tar_count > 0: > + yield UploadError( > + "%s: is format 1.0 but has debian.tar.*." > + % self.filename) > + > + if len(component_orig_tar_counts) > 0: > + yield UploadError( > + "%s: is format 1.0 but has orig-COMPONENT.tar.*." > + % self.filename) > + # Format 3.0 (native) must have exactly one tar.*. > + # gz and bz2 are valid compression types. > + elif self.format == '3.0 (native)': > + if native_tar_count == 0: > + yield UploadError( > + "%s: must have exactly one tar.*." > + % self.filename) > + > + if diff_count > 0: > + yield UploadError( > + "%s: is format 3.0 but has diff.gz." > + % self.filename) > + > + if len(component_orig_tar_counts) > 0: > + yield UploadError( > + "%s: is native but has orig-COMPONENT.tar.*." > + % self.filename) > + elif self.format == '3.0 (quilt)': > + if orig_tar_count == 0: > + yield UploadError( > + "%s: must have exactly one orig.tar.*." > + % self.filename) > + > + if debian_tar_count == 0: > + yield UploadError( > + "%s: must have exactly one debian.tar.*." > + % self.filename) > + > + if diff_count > 0: > + yield UploadError( > + "%s: is format 3.0 but has diff.gz." > + % self.filename) > + > + for component in component_orig_tar_counts: > + if component_orig_tar_counts[component] > 1: > + yield UploadError( > + "%s: has more than one orig-%s.tar.*." > + % (self.filename, component)) Can you split this massive method up a bit please. Ideally into three separate methods that check the different formats. That will also make it easier for you to unit test this change without creating new test packages so that we can be sure all the new file type rejection cases are handled. Also, can you please change it to compare against the format enum instead of strings. Then the assertion error below will make more sense. > else: > raise AssertionError("Unknown source format.") > > > === modified file 'lib/lp/archiveuploader/nascentuploadfile.py' > --- lib/lp/archiveuploader/nascentuploadfile.py 2009-11-10 13:09:26 +0000 > +++ lib/lp/archiveuploader/nascentuploadfile.py 2009-11-18 03:10:30 +0000 > @@ -352,8 +352,9 @@ > "Architecture field." % (self.filename)) > > version_chopped = re_no_epoch.sub('', self.version) > - if determine_source_file_type(self.filename) == ( > - SourcePackageFileType.ORIG_TARBALL): > + if determine_source_file_type(self.filename) in ( > + SourcePackageFileType.ORIG_TARBALL, > + SourcePackageFileType.COMPONENT_ORIG_TARBALL): I've seen this check a couple of times now, I think it would be a good idea to refactor it into a property. (is_orig or something like that) > version_chopped = re_no_revision.sub('', version_chopped) > > source_match = re_issource.match(self.filename) > > === added directory 'lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2' > === added file 'lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.diff.gz' > Binary files lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.diff.gz 1970-01-01 00:00:00 +0000 and lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.diff.gz 2009-11-18 03:10:29 +0000 differ > === added file 'lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.dsc' > --- lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.dsc 1970-01-01 00:00:00 +0000 > +++ lib/lp/archiveuploader/tests/data/suite/bar_1.0-1_1.0-bzip2/bar_1.0-1.dsc 2009-11-18 03:10:29 +0000 > @@ -0,0 +1,10 @@ > +Format: 1.0 > +Source: bar > +Version: 1.0-1 > +Binary: bar > +Maintainer: Launchpad team