diff --git a/pulp_python/app/migrations/0016_pythonpackagecontent_sha256_metadata_and_more.py b/pulp_python/app/migrations/0016_pythonpackagecontent_sha256_metadata_and_more.py new file mode 100644 index 00000000..e43a7b41 --- /dev/null +++ b/pulp_python/app/migrations/0016_pythonpackagecontent_sha256_metadata_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 4.2.24 on 2025-09-19 11:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("python", "0015_alter_pythonpackagecontent_options"), + ] + + operations = [ + migrations.AddField( + model_name="pythonpackagecontent", + name="metadata_sha256", + field=models.CharField(null=True, max_length=64), + preserve_default=False, + ), + migrations.AddField( + model_name="pythonpackagecontent", + name="yanked", + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name="pythonpackagecontent", + name="yanked_reason", + field=models.TextField(default=""), + preserve_default=False, + ), + ] diff --git a/pulp_python/app/models.py b/pulp_python/app/models.py index 3bd9d605..3649134f 100644 --- a/pulp_python/app/models.py +++ b/pulp_python/app/models.py @@ -192,6 +192,9 @@ class PythonPackageContent(Content): packagetype = models.TextField(choices=PACKAGE_TYPES) python_version = models.TextField() sha256 = models.CharField(db_index=True, max_length=64) + metadata_sha256 = models.CharField(max_length=64, null=True) + yanked = models.BooleanField(default=False) + yanked_reason = models.TextField() # From pulpcore PROTECTED_FROM_RECLAIM = False diff --git a/pulp_python/app/pypi/views.py b/pulp_python/app/pypi/views.py index bd8bc2af..c68f6ecb 100644 --- a/pulp_python/app/pypi/views.py +++ b/pulp_python/app/pypi/views.py @@ -3,7 +3,9 @@ from aiohttp.client_exceptions import ClientError from rest_framework.viewsets import ViewSet +from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer from rest_framework.response import Response +from rest_framework.exceptions import NotAcceptable from django.core.exceptions import ObjectDoesNotExist from django.shortcuts import redirect from datetime import datetime, timezone, timedelta @@ -43,7 +45,9 @@ ) from pulp_python.app.utils import ( write_simple_index, + write_simple_index_json, write_simple_detail, + write_simple_detail_json, python_content_to_json, PYPI_LAST_SERIAL, PYPI_SERIAL_CONSTANT, @@ -57,6 +61,17 @@ ORIGIN_HOST = settings.CONTENT_ORIGIN if settings.CONTENT_ORIGIN else settings.PYPI_API_HOSTNAME BASE_CONTENT_URL = urljoin(ORIGIN_HOST, settings.CONTENT_PATH_PREFIX) +PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html" +PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json" + + +class PyPISimpleHTMLRenderer(TemplateHTMLRenderer): + media_type = PYPI_SIMPLE_V1_HTML + + +class PyPISimpleJSONRenderer(JSONRenderer): + media_type = PYPI_SIMPLE_V1_JSON + class PyPIMixin: """Mixin to get index specific info.""" @@ -235,6 +250,25 @@ class SimpleView(PackageUploadMixin, ViewSet): ], } + def perform_content_negotiation(self, request, force=False): + """ + Uses standard content negotiation, defaulting to HTML if no acceptable renderer is found. + """ + try: + return super().perform_content_negotiation(request, force) + except NotAcceptable: + return TemplateHTMLRenderer(), TemplateHTMLRenderer.media_type # text/html + + def get_renderers(self): + """ + Uses custom renderers for PyPI Simple API endpoints, defaulting to standard ones. + """ + if self.action in ["list", "retrieve"]: + # Ordered by priority if multiple content types are present + return [TemplateHTMLRenderer(), PyPISimpleHTMLRenderer(), PyPISimpleJSONRenderer()] + else: + return [JSONRenderer(), BrowsableAPIRenderer()] + @extend_schema(summary="Get index simple page") def list(self, request, path): """Gets the simple api html page for the index.""" @@ -242,7 +276,16 @@ def list(self, request, path): if self.should_redirect(repo_version=repo_version): return redirect(urljoin(self.base_content_url, f"{path}/simple/")) names = content.order_by("name").values_list("name", flat=True).distinct().iterator() - return StreamingHttpResponse(write_simple_index(names, streamed=True)) + media_type = request.accepted_renderer.media_type + + if media_type == PYPI_SIMPLE_V1_JSON: + index_data = write_simple_index_json(names) + headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)} + return Response(index_data, headers=headers) + else: + index_data = write_simple_index(names, streamed=True) + kwargs = {"content_type": media_type} + return StreamingHttpResponse(index_data, **kwargs) def pull_through_package_simple(self, package, path, remote): """Gets the package's simple page from remote.""" @@ -252,7 +295,12 @@ def parse_package(release_package): stripped_url = urlunsplit(chain(parsed[:3], ("", ""))) redirect_path = f"{path}/{release_package.filename}?redirect={stripped_url}" d_url = urljoin(self.base_content_url, redirect_path) - return release_package.filename, d_url, release_package.digests.get("sha256", "") + return { + "filename": release_package.filename, + "url": d_url, + "sha256": release_package.digests.get("sha256", ""), + # todo: more fields? + } rfilter = get_remote_package_filter(remote) if not rfilter.filter_project(package): @@ -269,7 +317,7 @@ def parse_package(release_package): except TimeoutException: return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504) - if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json": + if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON: page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url) else: page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url) @@ -290,7 +338,15 @@ def retrieve(self, request, path, package): return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/")) packages = ( content.filter(name__normalize=normalized) - .values_list("filename", "sha256", "name") + .values_list( + "filename", + "sha256", + "name", + "metadata_sha256", + "requires_python", + "yanked", + "yanked_reason", + ) .iterator() ) try: @@ -300,8 +356,28 @@ def retrieve(self, request, path, package): else: packages = chain([present], packages) name = present[2] - releases = ((f, urljoin(self.base_content_url, f"{path}/{f}"), d) for f, d, _ in packages) - return StreamingHttpResponse(write_simple_detail(name, releases, streamed=True)) + releases = ( + { + "filename": f, + "url": urljoin(self.base_content_url, f"{path}/{f}"), + "sha256": s, + "metadata_sha256": sm, + "requires_python": rp, + "yanked": y, + "yanked_reason": yr, + } + for f, s, _, sm, rp, y, yr in packages + ) + media_type = request.accepted_renderer.media_type + + if media_type == PYPI_SIMPLE_V1_JSON: + detail_data = write_simple_detail_json(name, releases) + headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)} + return Response(detail_data, headers=headers) + else: + detail_data = write_simple_detail(name, releases, streamed=True) + kwargs = {"content_type": media_type} + return StreamingHttpResponse(detail_data, kwargs) @extend_schema( request=PackageUploadSerializer, diff --git a/pulp_python/app/utils.py b/pulp_python/app/utils.py index 533caba8..879ec65b 100644 --- a/pulp_python/app/utils.py +++ b/pulp_python/app/utils.py @@ -16,11 +16,13 @@ """TODO This serial constant is temporary until Python repositories implements serials""" PYPI_SERIAL_CONSTANT = 1000000000 +SIMPLE_API_VERSION = "1.0" + simple_index_template = """ Simple Index - + {% for name, canonical_name in projects %} @@ -30,16 +32,17 @@ """ +# noqa: E501 simple_detail_template = """ Links for {{ project_name }} - +

Links for {{ project_name }}

- {% for name, path, sha256 in project_packages %} - {{ name }}
+ {% for pkg in project_packages %} + {{ pkg.filename }}{% if pkg.yanked %}data-yanked="{{ pkg.yanked_reason }}"{% endif %}
{% endfor %} @@ -128,6 +131,9 @@ def parse_project_metadata(project): # Release metadata "packagetype": project.get("packagetype") or "", "python_version": project.get("python_version") or "", + "yanked": False, + "yanked_reason": "", + "metadata_sha256": "", # TODO } @@ -158,6 +164,9 @@ def parse_metadata(project, version, distribution): package["requires_python"] = distribution.get("requires_python") or package.get( "requires_python" ) # noqa: E501 + package["yanked"] = distribution.get("yanked") or False + package["yanked_reason"] = distribution.get("yanked_reason") or "" + package["metadata_sha256"] = distribution.get("data-dist-info-metadata", {}).get("sha256") or "" return package @@ -395,6 +404,7 @@ def find_artifact(): "upload_time": str(content.pulp_created), "upload_time_iso_8601": str(content.pulp_created.isoformat()), "url": url, + # todo yanked "yanked": False, "yanked_reason": None, } @@ -414,6 +424,51 @@ def write_simple_detail(project_name, project_packages, streamed=False): return detail.stream(**context) if streamed else detail.render(**context) +def write_simple_index_json(project_names): + """Writes the simple index in JSON format.""" + return { + "meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}, + "projects": [ + {"name": name, "_last-serial": PYPI_SERIAL_CONSTANT} for name in project_names + ], + } + + +def write_simple_detail_json(project_name, project_packages): + """Writes the simple detail page in JSON format.""" + return { + "meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}, + "name": canonicalize_name(project_name), + "files": [ + { + # v1.0, PEP 691 + "filename": package["filename"], + "url": package["url"], + "hashes": {"sha256": package["sha256"]}, + "requires_python": package["requires_python"] or None, + # data-dist-info-metadata is deprecated alias for core-metadata + "data-dist-info-metadata": ( + {"sha256": package["metadata_sha256"]} if package["metadata_sha256"] else False + ), + "yanked": ( + package["yanked_reason"] + if package["yanked"] and package["yanked_reason"] + else package["yanked"] + ), + # TODO: + # size, upload-time (v1.1, PEP 700) + # core-metadata (PEP 714) + # provenance and digital attestation (v1.3, PEP 740) + } + for package in project_packages + ], + # TODO: + # versions (v1.1, PEP 700) + # alternate-locations (v1.2, PEP 708) + # project-status (v1.4, PEP 792 - pypi and docs differ) + } + + class PackageIncludeFilter: """A special class to help filter Package's based on a remote's include/exclude""" diff --git a/pulp_python/tests/functional/api/test_full_mirror.py b/pulp_python/tests/functional/api/test_full_mirror.py index b2e9b404..c4137b5a 100644 --- a/pulp_python/tests/functional/api/test_full_mirror.py +++ b/pulp_python/tests/functional/api/test_full_mirror.py @@ -66,7 +66,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/pulpcore/") assert r.status_code == 404 - assert r.json() == {"detail": "pulpcore does not exist."} + assert r.text == "404 Not Found" r = requests.get(f"{distro.base_url}simple/shelf-reader/") assert r.status_code == 200 @@ -86,7 +86,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/django/") assert r.status_code == 404 - assert r.json() == {"detail": "django does not exist."} + assert r.text == "404 Not Found" r = requests.get(f"{distro.base_url}simple/pulpcore/") assert r.status_code == 502 diff --git a/pulp_python/tests/functional/api/test_pypi_simple_json_api.py b/pulp_python/tests/functional/api/test_pypi_simple_json_api.py new file mode 100644 index 00000000..befa2ae3 --- /dev/null +++ b/pulp_python/tests/functional/api/test_pypi_simple_json_api.py @@ -0,0 +1,98 @@ +from urllib.parse import urljoin + +import pytest +import requests + +from pulp_python.tests.functional.constants import PYTHON_SM_PROJECT_SPECIFIER + +API_VERSION = "1.0" +PYPI_SERIAL_CONSTANT = 1000000000 + +PYPI_TEXT_HTML = "text/html" +PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html" +PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json" + + +@pytest.mark.parallel +def test_simple_json_index_api( + python_remote_factory, python_repo_with_sync, python_distribution_factory +): + remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER) + repo = python_repo_with_sync(remote) + distro = python_distribution_factory(repository=repo) + + url = urljoin(distro.base_url, "simple/") + headers = {"Accept": PYPI_SIMPLE_V1_JSON} + + response = requests.get(url, headers=headers) + assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON + assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT) + + data = response.json() + assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT} + assert data["projects"] + for project in data["projects"]: + for i in ["_last-serial", "name"]: + assert i in project + + +@pytest.mark.parallel +def test_simple_json_detail_api( + python_remote_factory, python_repo_with_sync, python_distribution_factory +): + remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER) + repo = python_repo_with_sync(remote) + distro = python_distribution_factory(repository=repo) + + url = f'{urljoin(distro.base_url, "simple/")}aiohttp' + headers = {"Accept": PYPI_SIMPLE_V1_JSON} + + response = requests.get(url, headers=headers) + assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON + assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT) + + data = response.json() + assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT} + assert data["name"] == "aiohttp" + assert data["files"] + for file in data["files"]: + for i in [ + "filename", + "url", + "hashes", + "data-dist-info-metadata", + "requires_python", + "yanked", + ]: + assert i in file + + +@pytest.mark.parallel +@pytest.mark.parametrize( + "header, result", + [ + (PYPI_TEXT_HTML, PYPI_TEXT_HTML), + (PYPI_SIMPLE_V1_HTML, PYPI_SIMPLE_V1_HTML), + (PYPI_SIMPLE_V1_JSON, PYPI_SIMPLE_V1_JSON), + # Follows defined ordering (html, pypi html, pypi json) + (f"{PYPI_SIMPLE_V1_JSON}, {PYPI_SIMPLE_V1_HTML}", PYPI_SIMPLE_V1_HTML), + # Everything else should be html + ("", PYPI_TEXT_HTML), + ("application/json", PYPI_TEXT_HTML), + ("sth/else", PYPI_TEXT_HTML), + ], +) +def test_simple_api_content_headers( + python_remote_factory, python_repo_with_sync, python_distribution_factory, header, result +): + remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER) + repo = python_repo_with_sync(remote) + distro = python_distribution_factory(repository=repo) + + index_url = urljoin(distro.base_url, "simple/") + detail_url = f"{index_url}aiohttp" + + for url in [index_url, detail_url]: + response = requests.get(url, headers={"Accept": header}) + assert response.status_code == 200 + assert result in response.headers["Content-Type"]