diff --git a/src/pubtools/_pulp/tasks/push/items/base.py b/src/pubtools/_pulp/tasks/push/items/base.py index ae48d2bb..0b55b2a0 100644 --- a/src/pubtools/_pulp/tasks/push/items/base.py +++ b/src/pubtools/_pulp/tasks/push/items/base.py @@ -645,3 +645,7 @@ def upload_key(self): content types which can safely reuse uploads. """ return None + + def fail_if_duplicate(self, pulp_client): # pylint:disable=unused-argument + """Subclasses may override this method to check for duplicates and fail if found.""" + return None diff --git a/src/pubtools/_pulp/tasks/push/items/rpm.py b/src/pubtools/_pulp/tasks/push/items/rpm.py index 9fa69294..17d440c7 100644 --- a/src/pubtools/_pulp/tasks/push/items/rpm.py +++ b/src/pubtools/_pulp/tasks/push/items/rpm.py @@ -1,4 +1,5 @@ import os +import logging from pushsource import RpmPushItem import attr @@ -6,6 +7,8 @@ from .base import supports_type, PulpPushItem, UploadContext +LOG = logging.getLogger("pubtools.pulp") + @attr.s(frozen=True, slots=True) class RpmUploadContext(UploadContext): @@ -36,8 +39,8 @@ def unit_type(self): return RpmUnit @property - def rpm_nvr(self): - # (n, v, r) tuple derived from filename, used in cdn_path calculation + def rpm_nvra(self): + # (n, v, r, a) tuple derived from filename, used in cdn_path calculation # Filename convention can be found at: # http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html @@ -49,16 +52,18 @@ def rpm_nvr(self): # mpr.hcraon.1.1.3_7le.41-0.4.4-slootnimda-api filename_rev = "".join(reversed(filename)) - # 1.1.3_7le.41-0.4.4-slootnimda-api - nvr_rev = filename_rev.split(".", 2)[2] + # (hcraon, 1.1.3_7le.41-0.4.4-slootnimda-api) + arch_rev, nvr_rev = filename_rev.split(".", 2)[1:3] # ('1.1.3_7le.41', '0.4.4', 'slootnimda-api') components_revrev = nvr_rev.split("-", 2) - # ['14.el7_3.1.1', '4.4.0', 'ipa-admintools'] - components_rev = ["".join(reversed(c)) for c in components_revrev] + # ['noarch', '14.el7_3.1.1', '4.4.0', 'ipa-admintools'] + components_rev = [ + "".join(reversed(c)) for c in [arch_rev] + components_revrev + ] - # ('ipa-admintools', '4.4.0', '14.el7_3.1.1') + # ('ipa-admintools', '4.4.0', '14.el7_3.1.1', 'noarch') return tuple(reversed(components_rev)) except Exception as exc: # pylint: disable=broad-except # Crashes above may be a bit hard to understand, so we raise with @@ -71,7 +76,7 @@ def rpm_nvr(self): @property def cdn_path(self): """Desired value of RpmUnit.cdn_path field.""" - n, v, r = self.rpm_nvr + n, v, r, _ = self.rpm_nvra return os.path.join( "/content/origin/rpms", n, @@ -144,3 +149,30 @@ def ensure_uploaded(self, ctx, repo_f=None): def upload_to_repo(self, repo): return repo.upload_rpm(self.pushsource_item.content(), cdn_path=self.cdn_path) + + def fail_if_duplicate(self, pulp_client): + """ + Check for duplicate units in Pulp. If duplicate is found, raise an error. + + This is a workaround until we have a proper support for units with same NVR but different checksum in Pulp. + Such units are not allowed to be uploaded to Pulp as they would break the integrity of the repository when published. + """ + # import pdb; pdb.set_trace() + n, v, r, a = self.rpm_nvra + crit = Criteria.and_( + Criteria.with_unit_type(RpmUnit), + Criteria.and_( + Criteria.with_field("name", n), + Criteria.with_field("version", v), + Criteria.with_field("release", r), + Criteria.with_field("arch", a), + ), + ) + + result = pulp_client.search_content(crit).result() + for item in result: + msg = "Fatal error: Duplicate RPM present in Pulp: %s, sha256: %s" % ( + item.filename, + item.sha256sum, + ) + raise RuntimeError(msg) diff --git a/src/pubtools/_pulp/tasks/push/phase/constants.py b/src/pubtools/_pulp/tasks/push/phase/constants.py index 8c713d51..36490049 100644 --- a/src/pubtools/_pulp/tasks/push/phase/constants.py +++ b/src/pubtools/_pulp/tasks/push/phase/constants.py @@ -112,3 +112,8 @@ def atom(name): OUT_MAX_FUTURES = int(os.getenv("PUBTOOLS_PULP_OUT_MAX_FUTURES") or "10") """Max number of pending futures in output buffer.""" + +# Special workaround for allowing duplicate units to be uploaded to Pulp. +ALLOW_DUPLICATE_UNITS = os.getenv( + "PUBTOOLS_PULP_ALLOW_DUPLICATE_UNITS", "True" +).strip().lower() in ("true", "1", "yes", "y") diff --git a/src/pubtools/_pulp/tasks/push/phase/upload.py b/src/pubtools/_pulp/tasks/push/phase/upload.py index 3be6772e..a3f6cbac 100644 --- a/src/pubtools/_pulp/tasks/push/phase/upload.py +++ b/src/pubtools/_pulp/tasks/push/phase/upload.py @@ -1,6 +1,9 @@ +import os import logging + from .base import Phase +from . import constants from ..items import State LOG = logging.getLogger("pubtools.pulp") @@ -59,6 +62,13 @@ def run(self): else: # This item is not in Pulp, or otherwise needs a reupload. item_type = type(item) + + if ( + not constants.ALLOW_DUPLICATE_UNITS + and item.pulp_state == State.MISSING + ): + item.fail_if_duplicate(self.pulp_client) + if item.MULTI_UPLOAD_CONTEXT: if item_type not in upload_context: upload_context[item_type] = {} diff --git a/tests/push/test_association_race.py b/tests/push/test_association_race.py index d7018129..5daf9811 100644 --- a/tests/push/test_association_race.py +++ b/tests/push/test_association_race.py @@ -43,7 +43,7 @@ def new_iter(self): indices = [ x[0] for x in enumerate(batch) - if getattr(x[1], "rpm_nvr", ()) == ("walrus", "5.21", "1") + if getattr(x[1], "rpm_nvra", ()) == ("walrus", "5.21", "1", "noarch") ] # modify (each) walrus package, so that it is presumed to be in # an incorrect repository diff --git a/tests/push/test_push_rpm_duplicate_fail.py b/tests/push/test_push_rpm_duplicate_fail.py new file mode 100644 index 00000000..f83fcb94 --- /dev/null +++ b/tests/push/test_push_rpm_duplicate_fail.py @@ -0,0 +1,75 @@ +import functools + +import pytest + +from pushsource import Source, RpmPushItem +from pubtools.pulplib import RpmUnit + +from pubtools._pulp.tasks.push import entry_point +from pubtools._pulp.tasks.push.phase import constants + + +def test_push_rpm_duplicate_fail( + fake_controller, fake_push, command_tester, caplog, monkeypatch +): + """Test that push detects and fails in the case where a RPM with the same NVR but different checksum is pushed to Pulp.""" + monkeypatch.setattr(constants, "ALLOW_DUPLICATE_UNITS", False) + + client = fake_controller.client + + rpm_dest = client.get_repository("dest1").result() + + # Make this file exist but not in all the desired repos. + existing_rpm = RpmUnit( + name="some-rpm", + version="1.0.0", + release="1", + arch="noarch", + sha256sum="db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb", + filename="some-rpm-1.0.0-1.noarch.rpm", + ) + fake_controller.insert_units(rpm_dest, [existing_rpm]) + + # Unit is now in rpm-dest. + # Set up a pushsource backend which requests push of the RPM with the same NVR but different checksum + Source.register_backend( + "test", + lambda: [ + RpmPushItem( + name="some-rpm-1.0.0-1.noarch.rpm", + dest=["dest1"], + sha256sum="e823456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + md5sum=32 * "a", + src="fake/path", + ), + ], + ) + + args = [ + "", + "--source", + "test:", + "--pulp-url", + "https://pulp.example.com/", + "--allow-unsigned", + ] + + run = functools.partial(entry_point, cls=lambda: fake_push) + + # Ask it to push. + with pytest.raises(SystemExit) as excinfo: + command_tester.test( + run, + args, + # Can't guarantee a stable log order. + compare_plaintext=False, + compare_jsonl=False, + ) + + # It should have failed. + assert excinfo.value.code == 59 + + # It should tell us why it failed. + msg = "Duplicate RPM present in Pulp: some-rpm-1.0.0-1.noarch.rpm, sha256: db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb" + + assert msg in caplog.text diff --git a/tests/push/test_rpm_bad_filename.py b/tests/push/test_rpm_bad_filename.py index 78dc0871..f8fd3f0b 100644 --- a/tests/push/test_rpm_bad_filename.py +++ b/tests/push/test_rpm_bad_filename.py @@ -6,7 +6,7 @@ def test_rpm_bad_filename(): - """rpm_nvr raises a meaningful error when NVR can't be parsed.""" + """rpm_nvra raises a meaningful error when NVR can't be parsed.""" # This RPM's filename doesn't match the NVR.A.rpm convention # per http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html @@ -14,7 +14,7 @@ def test_rpm_bad_filename(): # We should not be able to retrieve the NVR. with pytest.raises(ValueError) as excinfo: - item.rpm_nvr + item.rpm_nvra # The exception should tell us why. assert (