From cab454db72768015718d9c65b3d4ed9678b3f90a Mon Sep 17 00:00:00 2001 From: Robert Bikar Date: Fri, 5 Jun 2026 10:26:22 +0200 Subject: [PATCH 1/2] Disallow RPM upload with the same NVR [RHELDST-37824] This change introduces a functionality that will raise a fatal error when upload of an RPM with NVR of already present unit in pulp. The RPMs in question differ only with their checksums (and signing keys), further association to live repositories and publish will cause breakage of repository. Duplicates check is by default disabled, and can be enabled by setting PUBTOOLS_PULP_ALLOW_DUPLICATE_UNITS env var. This should be a workaround until proper suport for sucvh scenario is implemented. --- src/pubtools/_pulp/tasks/push/items/base.py | 4 + src/pubtools/_pulp/tasks/push/items/rpm.py | 48 +++++++++-- .../_pulp/tasks/push/phase/constants.py | 5 ++ src/pubtools/_pulp/tasks/push/phase/upload.py | 9 +++ tests/push/test_association_race.py | 2 +- tests/push/test_push_rpm_duplicate_fail.py | 80 +++++++++++++++++++ tests/push/test_rpm_bad_filename.py | 4 +- 7 files changed, 141 insertions(+), 11 deletions(-) create mode 100644 tests/push/test_push_rpm_duplicate_fail.py diff --git a/src/pubtools/_pulp/tasks/push/items/base.py b/src/pubtools/_pulp/tasks/push/items/base.py index ae48d2bb..362505ad 100644 --- a/src/pubtools/_pulp/tasks/push/items/base.py +++ b/src/pubtools/_pulp/tasks/push/items/base.py @@ -645,3 +645,7 @@ def upload_key(self): content types which can safely reuse uploads. """ return None + + def fail_if_duplicate(self, pulp_client): # pylint:disable=unused-argument + """Subclasses may override this method to check for duplicates and fail if found.""" + return None diff --git a/src/pubtools/_pulp/tasks/push/items/rpm.py b/src/pubtools/_pulp/tasks/push/items/rpm.py index 9fa69294..6412a864 100644 --- a/src/pubtools/_pulp/tasks/push/items/rpm.py +++ b/src/pubtools/_pulp/tasks/push/items/rpm.py @@ -1,4 +1,5 @@ import os +import logging from pushsource import RpmPushItem import attr @@ -6,6 +7,8 @@ from .base import supports_type, PulpPushItem, UploadContext +LOG = logging.getLogger("pubtools.pulp") + @attr.s(frozen=True, slots=True) class RpmUploadContext(UploadContext): @@ -36,8 +39,8 @@ def unit_type(self): return RpmUnit @property - def rpm_nvr(self): - # (n, v, r) tuple derived from filename, used in cdn_path calculation + def rpm_nvra(self): + # (n, v, r, a) tuple derived from filename, used in cdn_path calculation # Filename convention can be found at: # http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html @@ -49,16 +52,18 @@ def rpm_nvr(self): # mpr.hcraon.1.1.3_7le.41-0.4.4-slootnimda-api filename_rev = "".join(reversed(filename)) - # 1.1.3_7le.41-0.4.4-slootnimda-api - nvr_rev = filename_rev.split(".", 2)[2] + # (hcraon, 1.1.3_7le.41-0.4.4-slootnimda-api) + arch_rev, nvr_rev = filename_rev.split(".", 2)[1:3] # ('1.1.3_7le.41', '0.4.4', 'slootnimda-api') components_revrev = nvr_rev.split("-", 2) - # ['14.el7_3.1.1', '4.4.0', 'ipa-admintools'] - components_rev = ["".join(reversed(c)) for c in components_revrev] + # ['noarch', '14.el7_3.1.1', '4.4.0', 'ipa-admintools'] + components_rev = [ + "".join(reversed(c)) for c in [arch_rev] + components_revrev + ] - # ('ipa-admintools', '4.4.0', '14.el7_3.1.1') + # ('ipa-admintools', '4.4.0', '14.el7_3.1.1', 'noarch') return tuple(reversed(components_rev)) except Exception as exc: # pylint: disable=broad-except # Crashes above may be a bit hard to understand, so we raise with @@ -71,7 +76,7 @@ def rpm_nvr(self): @property def cdn_path(self): """Desired value of RpmUnit.cdn_path field.""" - n, v, r = self.rpm_nvr + n, v, r, _ = self.rpm_nvra return os.path.join( "/content/origin/rpms", n, @@ -144,3 +149,30 @@ def ensure_uploaded(self, ctx, repo_f=None): def upload_to_repo(self, repo): return repo.upload_rpm(self.pushsource_item.content(), cdn_path=self.cdn_path) + + def fail_if_duplicate(self, pulp_client): + """ + Check for duplicate units in Pulp. If duplicate is found, raise an error. + + This is a workaround until we have a proper support for units with same NVR but different checksum in Pulp. + Such units are not allowed to be uploaded to Pulp as they would break the integrity of the repository when published. + """ + #import pdb; pdb.set_trace() + n, v, r, a = self.rpm_nvra + crit = Criteria.and_( + Criteria.with_unit_type(RpmUnit), + Criteria.and_( + Criteria.with_field("name", n), + Criteria.with_field("version", v), + Criteria.with_field("release", r), + Criteria.with_field("arch", a), + ), + ) + + result = pulp_client.search_content(crit).result() + for item in result: + msg = ( + "Fatal error: Duplicate RPM present in Pulp: %s, sha256: %s" % + (item.filename, item.sha256sum) + ) + raise RuntimeError(msg) diff --git a/src/pubtools/_pulp/tasks/push/phase/constants.py b/src/pubtools/_pulp/tasks/push/phase/constants.py index 8c713d51..36490049 100644 --- a/src/pubtools/_pulp/tasks/push/phase/constants.py +++ b/src/pubtools/_pulp/tasks/push/phase/constants.py @@ -112,3 +112,8 @@ def atom(name): OUT_MAX_FUTURES = int(os.getenv("PUBTOOLS_PULP_OUT_MAX_FUTURES") or "10") """Max number of pending futures in output buffer.""" + +# Special workaround for allowing duplicate units to be uploaded to Pulp. +ALLOW_DUPLICATE_UNITS = os.getenv( + "PUBTOOLS_PULP_ALLOW_DUPLICATE_UNITS", "True" +).strip().lower() in ("true", "1", "yes", "y") diff --git a/src/pubtools/_pulp/tasks/push/phase/upload.py b/src/pubtools/_pulp/tasks/push/phase/upload.py index 3be6772e..4d7b4484 100644 --- a/src/pubtools/_pulp/tasks/push/phase/upload.py +++ b/src/pubtools/_pulp/tasks/push/phase/upload.py @@ -1,8 +1,13 @@ +import os import logging + from .base import Phase +from . import constants from ..items import State + + LOG = logging.getLogger("pubtools.pulp") @@ -59,6 +64,10 @@ def run(self): else: # This item is not in Pulp, or otherwise needs a reupload. item_type = type(item) + + if not constants.ALLOW_DUPLICATE_UNITS and item.pulp_state == State.MISSING: + item.fail_if_duplicate(self.pulp_client) + if item.MULTI_UPLOAD_CONTEXT: if item_type not in upload_context: upload_context[item_type] = {} diff --git a/tests/push/test_association_race.py b/tests/push/test_association_race.py index d7018129..5daf9811 100644 --- a/tests/push/test_association_race.py +++ b/tests/push/test_association_race.py @@ -43,7 +43,7 @@ def new_iter(self): indices = [ x[0] for x in enumerate(batch) - if getattr(x[1], "rpm_nvr", ()) == ("walrus", "5.21", "1") + if getattr(x[1], "rpm_nvra", ()) == ("walrus", "5.21", "1", "noarch") ] # modify (each) walrus package, so that it is presumed to be in # an incorrect repository diff --git a/tests/push/test_push_rpm_duplicate_fail.py b/tests/push/test_push_rpm_duplicate_fail.py new file mode 100644 index 00000000..38ad6733 --- /dev/null +++ b/tests/push/test_push_rpm_duplicate_fail.py @@ -0,0 +1,80 @@ +import functools + +import pytest + +from pushsource import Source, RpmPushItem +from pubtools.pulplib import RpmUnit + +from pubtools._pulp.tasks.push import entry_point +from pubtools._pulp.tasks.push.phase import constants + + +def test_push_rpm_duplicate_fail( + fake_controller, fake_push, command_tester, caplog, monkeypatch +): + """Test that push detects and fails in the case where a RPM with the same NVR but different checksum is pushed to Pulp.""" + monkeypatch.setattr(constants, "ALLOW_DUPLICATE_UNITS", False) + + client = fake_controller.client + + rpm_dest = client.get_repository("dest1").result() + + # Make this file exist but not in all the desired repos. + existing_rpm = RpmUnit( + name="some-rpm", + version="1.0.0", + release="1", + arch="noarch", + sha256sum="db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb", + filename="some-rpm-1.0.0-1.noarch.rpm", + ) + fake_controller.insert_units(rpm_dest, [existing_rpm]) + + # Unit is now in rpm-dest. + # Set up a pushsource backend which requests push of the RPM with the same NVR but different checksum + Source.register_backend( + "test", + lambda: [ + RpmPushItem( + name="some-rpm-1.0.0-1.noarch.rpm", + dest=["dest1"], + sha256sum="e823456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + md5sum=32*"a", + src="fake/path" + ), + ], + + ) + + args = [ + "", + "--source", + "test:", + "--pulp-url", + "https://pulp.example.com/", + "--allow-unsigned", + + ] + + + run = functools.partial(entry_point, cls=lambda: fake_push) + + # Ask it to push. + with pytest.raises(SystemExit) as excinfo: + command_tester.test( + run, + args, + # Can't guarantee a stable log order. + compare_plaintext=False, + compare_jsonl=False, + ) + + # It should have failed. + assert excinfo.value.code == 59 + + # It should tell us why it failed. + msg = ( + "Duplicate RPM present in Pulp: some-rpm-1.0.0-1.noarch.rpm, sha256: db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb" + ) + + assert msg in caplog.text diff --git a/tests/push/test_rpm_bad_filename.py b/tests/push/test_rpm_bad_filename.py index 78dc0871..f8fd3f0b 100644 --- a/tests/push/test_rpm_bad_filename.py +++ b/tests/push/test_rpm_bad_filename.py @@ -6,7 +6,7 @@ def test_rpm_bad_filename(): - """rpm_nvr raises a meaningful error when NVR can't be parsed.""" + """rpm_nvra raises a meaningful error when NVR can't be parsed.""" # This RPM's filename doesn't match the NVR.A.rpm convention # per http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html @@ -14,7 +14,7 @@ def test_rpm_bad_filename(): # We should not be able to retrieve the NVR. with pytest.raises(ValueError) as excinfo: - item.rpm_nvr + item.rpm_nvra # The exception should tell us why. assert ( From c08da81146f007646daa1cd758d9dec27525c3f8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:29:12 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pubtools/_pulp/tasks/push/items/base.py | 2 +- src/pubtools/_pulp/tasks/push/items/rpm.py | 8 ++++---- src/pubtools/_pulp/tasks/push/phase/upload.py | 9 +++++---- tests/push/test_push_rpm_duplicate_fail.py | 11 +++-------- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/pubtools/_pulp/tasks/push/items/base.py b/src/pubtools/_pulp/tasks/push/items/base.py index 362505ad..0b55b2a0 100644 --- a/src/pubtools/_pulp/tasks/push/items/base.py +++ b/src/pubtools/_pulp/tasks/push/items/base.py @@ -646,6 +646,6 @@ def upload_key(self): """ return None - def fail_if_duplicate(self, pulp_client): # pylint:disable=unused-argument + def fail_if_duplicate(self, pulp_client): # pylint:disable=unused-argument """Subclasses may override this method to check for duplicates and fail if found.""" return None diff --git a/src/pubtools/_pulp/tasks/push/items/rpm.py b/src/pubtools/_pulp/tasks/push/items/rpm.py index 6412a864..17d440c7 100644 --- a/src/pubtools/_pulp/tasks/push/items/rpm.py +++ b/src/pubtools/_pulp/tasks/push/items/rpm.py @@ -157,7 +157,7 @@ def fail_if_duplicate(self, pulp_client): This is a workaround until we have a proper support for units with same NVR but different checksum in Pulp. Such units are not allowed to be uploaded to Pulp as they would break the integrity of the repository when published. """ - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() n, v, r, a = self.rpm_nvra crit = Criteria.and_( Criteria.with_unit_type(RpmUnit), @@ -171,8 +171,8 @@ def fail_if_duplicate(self, pulp_client): result = pulp_client.search_content(crit).result() for item in result: - msg = ( - "Fatal error: Duplicate RPM present in Pulp: %s, sha256: %s" % - (item.filename, item.sha256sum) + msg = "Fatal error: Duplicate RPM present in Pulp: %s, sha256: %s" % ( + item.filename, + item.sha256sum, ) raise RuntimeError(msg) diff --git a/src/pubtools/_pulp/tasks/push/phase/upload.py b/src/pubtools/_pulp/tasks/push/phase/upload.py index 4d7b4484..a3f6cbac 100644 --- a/src/pubtools/_pulp/tasks/push/phase/upload.py +++ b/src/pubtools/_pulp/tasks/push/phase/upload.py @@ -6,8 +6,6 @@ from . import constants from ..items import State - - LOG = logging.getLogger("pubtools.pulp") @@ -64,8 +62,11 @@ def run(self): else: # This item is not in Pulp, or otherwise needs a reupload. item_type = type(item) - - if not constants.ALLOW_DUPLICATE_UNITS and item.pulp_state == State.MISSING: + + if ( + not constants.ALLOW_DUPLICATE_UNITS + and item.pulp_state == State.MISSING + ): item.fail_if_duplicate(self.pulp_client) if item.MULTI_UPLOAD_CONTEXT: diff --git a/tests/push/test_push_rpm_duplicate_fail.py b/tests/push/test_push_rpm_duplicate_fail.py index 38ad6733..f83fcb94 100644 --- a/tests/push/test_push_rpm_duplicate_fail.py +++ b/tests/push/test_push_rpm_duplicate_fail.py @@ -39,11 +39,10 @@ def test_push_rpm_duplicate_fail( name="some-rpm-1.0.0-1.noarch.rpm", dest=["dest1"], sha256sum="e823456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - md5sum=32*"a", - src="fake/path" + md5sum=32 * "a", + src="fake/path", ), ], - ) args = [ @@ -53,10 +52,8 @@ def test_push_rpm_duplicate_fail( "--pulp-url", "https://pulp.example.com/", "--allow-unsigned", - ] - run = functools.partial(entry_point, cls=lambda: fake_push) # Ask it to push. @@ -73,8 +70,6 @@ def test_push_rpm_duplicate_fail( assert excinfo.value.code == 59 # It should tell us why it failed. - msg = ( - "Duplicate RPM present in Pulp: some-rpm-1.0.0-1.noarch.rpm, sha256: db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb" - ) + msg = "Duplicate RPM present in Pulp: some-rpm-1.0.0-1.noarch.rpm, sha256: db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb" assert msg in caplog.text