Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/pubtools/_pulp/tasks/push/items/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,7 @@ def upload_key(self):
content types which can safely reuse uploads.
"""
return None

def fail_if_duplicate(self, pulp_client): # pylint:disable=unused-argument
"""Subclasses may override this method to check for duplicates and fail if found."""
return None
48 changes: 40 additions & 8 deletions src/pubtools/_pulp/tasks/push/items/rpm.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import os
import logging

from pushsource import RpmPushItem
import attr
from pubtools.pulplib import RpmUnit, Criteria

from .base import supports_type, PulpPushItem, UploadContext

LOG = logging.getLogger("pubtools.pulp")


@attr.s(frozen=True, slots=True)
class RpmUploadContext(UploadContext):
Expand Down Expand Up @@ -36,8 +39,8 @@ def unit_type(self):
return RpmUnit

@property
def rpm_nvr(self):
# (n, v, r) tuple derived from filename, used in cdn_path calculation
def rpm_nvra(self):
# (n, v, r, a) tuple derived from filename, used in cdn_path calculation

# Filename convention can be found at:
# http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html
Expand All @@ -49,16 +52,18 @@ def rpm_nvr(self):
# mpr.hcraon.1.1.3_7le.41-0.4.4-slootnimda-api
filename_rev = "".join(reversed(filename))

# 1.1.3_7le.41-0.4.4-slootnimda-api
nvr_rev = filename_rev.split(".", 2)[2]
# (hcraon, 1.1.3_7le.41-0.4.4-slootnimda-api)
arch_rev, nvr_rev = filename_rev.split(".", 2)[1:3]

# ('1.1.3_7le.41', '0.4.4', 'slootnimda-api')
components_revrev = nvr_rev.split("-", 2)

# ['14.el7_3.1.1', '4.4.0', 'ipa-admintools']
components_rev = ["".join(reversed(c)) for c in components_revrev]
# ['noarch', '14.el7_3.1.1', '4.4.0', 'ipa-admintools']
components_rev = [
"".join(reversed(c)) for c in [arch_rev] + components_revrev
]

# ('ipa-admintools', '4.4.0', '14.el7_3.1.1')
# ('ipa-admintools', '4.4.0', '14.el7_3.1.1', 'noarch')
return tuple(reversed(components_rev))
except Exception as exc: # pylint: disable=broad-except
# Crashes above may be a bit hard to understand, so we raise with
Expand All @@ -71,7 +76,7 @@ def rpm_nvr(self):
@property
def cdn_path(self):
"""Desired value of RpmUnit.cdn_path field."""
n, v, r = self.rpm_nvr
n, v, r, _ = self.rpm_nvra
return os.path.join(
"/content/origin/rpms",
n,
Expand Down Expand Up @@ -144,3 +149,30 @@ def ensure_uploaded(self, ctx, repo_f=None):

def upload_to_repo(self, repo):
return repo.upload_rpm(self.pushsource_item.content(), cdn_path=self.cdn_path)

def fail_if_duplicate(self, pulp_client):
"""
Check for duplicate units in Pulp. If duplicate is found, raise an error.

This is a workaround until we have a proper support for units with same NVR but different checksum in Pulp.
Such units are not allowed to be uploaded to Pulp as they would break the integrity of the repository when published.
"""
# import pdb; pdb.set_trace()
n, v, r, a = self.rpm_nvra
crit = Criteria.and_(
Criteria.with_unit_type(RpmUnit),
Criteria.and_(
Criteria.with_field("name", n),
Criteria.with_field("version", v),
Criteria.with_field("release", r),
Criteria.with_field("arch", a),
),
)

result = pulp_client.search_content(crit).result()
for item in result:
msg = "Fatal error: Duplicate RPM present in Pulp: %s, sha256: %s" % (
item.filename,
item.sha256sum,
)
raise RuntimeError(msg)
5 changes: 5 additions & 0 deletions src/pubtools/_pulp/tasks/push/phase/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,8 @@ def atom(name):

OUT_MAX_FUTURES = int(os.getenv("PUBTOOLS_PULP_OUT_MAX_FUTURES") or "10")
"""Max number of pending futures in output buffer."""

# Special workaround for allowing duplicate units to be uploaded to Pulp.
ALLOW_DUPLICATE_UNITS = os.getenv(
"PUBTOOLS_PULP_ALLOW_DUPLICATE_UNITS", "True"
).strip().lower() in ("true", "1", "yes", "y")
10 changes: 10 additions & 0 deletions src/pubtools/_pulp/tasks/push/phase/upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
import logging


from .base import Phase
from . import constants
from ..items import State

LOG = logging.getLogger("pubtools.pulp")
Expand Down Expand Up @@ -59,6 +62,13 @@ def run(self):
else:
# This item is not in Pulp, or otherwise needs a reupload.
item_type = type(item)

if (
not constants.ALLOW_DUPLICATE_UNITS
and item.pulp_state == State.MISSING
):
item.fail_if_duplicate(self.pulp_client)

if item.MULTI_UPLOAD_CONTEXT:
if item_type not in upload_context:
upload_context[item_type] = {}
Expand Down
2 changes: 1 addition & 1 deletion tests/push/test_association_race.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def new_iter(self):
indices = [
x[0]
for x in enumerate(batch)
if getattr(x[1], "rpm_nvr", ()) == ("walrus", "5.21", "1")
if getattr(x[1], "rpm_nvra", ()) == ("walrus", "5.21", "1", "noarch")
]
# modify (each) walrus package, so that it is presumed to be in
# an incorrect repository
Expand Down
75 changes: 75 additions & 0 deletions tests/push/test_push_rpm_duplicate_fail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import functools

import pytest

from pushsource import Source, RpmPushItem
from pubtools.pulplib import RpmUnit

from pubtools._pulp.tasks.push import entry_point
from pubtools._pulp.tasks.push.phase import constants


def test_push_rpm_duplicate_fail(
fake_controller, fake_push, command_tester, caplog, monkeypatch
):
"""Test that push detects and fails in the case where a RPM with the same NVR but different checksum is pushed to Pulp."""
monkeypatch.setattr(constants, "ALLOW_DUPLICATE_UNITS", False)

client = fake_controller.client

rpm_dest = client.get_repository("dest1").result()

# Make this file exist but not in all the desired repos.
existing_rpm = RpmUnit(
name="some-rpm",
version="1.0.0",
release="1",
arch="noarch",
sha256sum="db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb",
filename="some-rpm-1.0.0-1.noarch.rpm",
)
fake_controller.insert_units(rpm_dest, [existing_rpm])

# Unit is now in rpm-dest.
# Set up a pushsource backend which requests push of the RPM with the same NVR but different checksum
Source.register_backend(
"test",
lambda: [
RpmPushItem(
name="some-rpm-1.0.0-1.noarch.rpm",
dest=["dest1"],
sha256sum="e823456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
md5sum=32 * "a",
src="fake/path",
),
],
)

args = [
"",
"--source",
"test:",
"--pulp-url",
"https://pulp.example.com/",
"--allow-unsigned",
]

run = functools.partial(entry_point, cls=lambda: fake_push)

# Ask it to push.
with pytest.raises(SystemExit) as excinfo:
command_tester.test(
run,
args,
# Can't guarantee a stable log order.
compare_plaintext=False,
compare_jsonl=False,
)

# It should have failed.
assert excinfo.value.code == 59

# It should tell us why it failed.
msg = "Duplicate RPM present in Pulp: some-rpm-1.0.0-1.noarch.rpm, sha256: db68c8a70f8383de71c107dca5fcfe53b1132186d1a6681d9ee3f4eea724fabb"

assert msg in caplog.text
4 changes: 2 additions & 2 deletions tests/push/test_rpm_bad_filename.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@


def test_rpm_bad_filename():
"""rpm_nvr raises a meaningful error when NVR can't be parsed."""
"""rpm_nvra raises a meaningful error when NVR can't be parsed."""

# This RPM's filename doesn't match the NVR.A.rpm convention
# per http://ftp.rpm.org/max-rpm/ch-rpm-file-format.html
item = PulpRpmPushItem(pushsource_item=RpmPushItem(name="my-badlynamed.rpm"))

# We should not be able to retrieve the NVR.
with pytest.raises(ValueError) as excinfo:
item.rpm_nvr
item.rpm_nvra

# The exception should tell us why.
assert (
Expand Down
Loading