Skip to content
27 changes: 27 additions & 0 deletions docs/reference/authentication.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,33 @@ Either `token` or `token_env` must be set for `bearer` and `basic-pat` schemes.
}
```

### GitHub Enterprise Server (GHES)

To use a private catalog or extension hosted on a GitHub Enterprise Server
instance, add a `github` entry listing your GHES host(s). The same entry
authenticates both catalog JSON fetches **and** private release-asset
downloads — Specify recognizes the listed hosts as GitHub Enterprise and
resolves release downloads through the GHES REST API (`/api/v3`).

```json
{
"providers": [
{
"hosts": ["ghes.example.com", "raw.ghes.example.com", "codeload.ghes.example.com"],
"provider": "github",
"auth": "bearer",
"token_env": "GH_ENTERPRISE_TOKEN"
}
]
}
```

List the **bare** web host (e.g. `ghes.example.com`) — release-download URLs
live there. If your instance uses subdomain isolation, also list the `raw.`
and `codeload.` subdomains your catalog/extension URLs use. A
`*.ghes.example.com` wildcard matches subdomains but **not** the bare host,
so always include the bare host explicitly.

### Azure DevOps (`azure-devops`)

| Scheme | Header | Use for |
Expand Down
6 changes: 4 additions & 2 deletions src/specify_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,9 +1128,10 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
raise typer.Exit(1)

from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
from specify_cli.authentication.http import github_provider_hosts

_wf_url_extra_headers = None
_resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30)
_resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30, github_hosts=github_provider_hosts())
if _resolved_wf_url:
source = _resolved_wf_url
_wf_url_extra_headers = {"Accept": "application/octet-stream"}
Expand Down Expand Up @@ -1234,10 +1235,11 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:

try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli.authentication.http import github_provider_hosts
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset

_wf_cat_extra_headers = None
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30)
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30, github_hosts=github_provider_hosts())
if _resolved_workflow_url:
workflow_url = _resolved_workflow_url
_wf_cat_extra_headers = {"Accept": "application/octet-stream"}
Expand Down
87 changes: 56 additions & 31 deletions src/specify_cli/_github_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import os
import urllib.request
from fnmatch import fnmatch
from typing import Callable, Dict, Optional
from urllib.parse import quote, unquote, urlparse

Expand Down Expand Up @@ -56,55 +57,79 @@ def build_github_request(url: str) -> urllib.request.Request:
return urllib.request.Request(url, headers=headers)


def _host_matches(hostname: str, patterns: tuple[str, ...]) -> bool:
"""Return True when *hostname* matches a pattern (exact or ``*.suffix``)."""
hostname = hostname.lower()
return any(p == hostname or fnmatch(hostname, p) for p in patterns)


def resolve_github_release_asset_api_url(
download_url: str,
open_url_fn: Callable,
timeout: int = 60,
github_hosts: tuple[str, ...] = (),
) -> Optional[str]:
"""Resolve a GitHub browser release URL to its REST API asset URL.

For private or SSO-protected repositories, browser release download
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
redirect to an HTML/SSO page instead of delivering the file. This
helper resolves such a URL to the matching GitHub REST API asset URL
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
then be downloaded with ``Accept: application/octet-stream`` and an
auth token to retrieve the actual file payload.

If *download_url* is already a REST API asset URL, it is returned
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
URLs return ``None``. If the API lookup fails (e.g. network error or
asset not found), ``None`` is returned so callers can fall back to the
original URL.
"""Resolve a GitHub release browser-download URL to its REST API asset URL.

Works for public ``github.com`` and for GitHub Enterprise Server (GHES)
hosts. A host is treated as GHES when it matches one of *github_hosts*
(exact hostname or ``*.suffix``) — supply the hosts the user has trusted
under a ``github`` provider in ``auth.json``. This allowlist is the
security gate: unlisted hosts never receive GHES API treatment, so a
malicious catalog cannot induce an API request to an arbitrary host.

For a public URL the API base is ``https://api.github.com``; for a GHES
host it is ``{scheme}://{host[:port]}/api/v3``. Returns the API asset URL
(downloadable with ``Accept: application/octet-stream`` + a token), the
input unchanged if it is already an API asset URL, or ``None`` when the
URL is not a resolvable GitHub release download or the lookup fails.

Args:
download_url: The URL to resolve.
open_url_fn: A callable compatible with
``specify_cli.authentication.http.open_url`` used to make the
authenticated API request.
``specify_cli.authentication.http.open_url`` used for the
authenticated release-metadata lookup.
timeout: Per-request timeout in seconds.

Returns:
The resolved REST API asset URL, or ``None`` if resolution is not
applicable or fails.
github_hosts: Host patterns to treat as GitHub Enterprise Server.
"""
import json
import urllib.error

parsed = urlparse(download_url)
hostname = (parsed.hostname or "").lower()
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]

# Already a REST API asset URL — use it directly
if (
parsed.hostname == "api.github.com"
and len(parts) >= 6
and parts[:1] == ["repos"]
and parts[3:5] == ["releases", "assets"]
):
is_ghes = (
bool(hostname)
and hostname not in GITHUB_HOSTS
and _host_matches(hostname, github_hosts)
)

def _is_asset_path(segments: list[str]) -> bool:
return (
len(segments) >= 6
and segments[:1] == ["repos"]
and segments[3:5] == ["releases", "assets"]
)

# Already a REST API asset URL — use it directly. Pure passthrough induces
# no new request: the caller fetches this same URL regardless, so it is
# gated on path shape alone rather than the GHES allowlist. The token stays
# independently gated by auth.json in the download helper, and only the
# resolving path below (which issues a tag-lookup request) needs the
# allowlist as its anti-SSRF gate.
if hostname == "api.github.com" and _is_asset_path(parts):
return download_url
if hostname and parts[:2] == ["api", "v3"] and _is_asset_path(parts[2:]):
return download_url

# Only handle github.com browser release download URLs
if parsed.hostname != "github.com":
# Determine the REST API base for browser release-download URLs.
if hostname == "github.com":
api_base = "https://api.github.com"
elif is_ghes:
authority = hostname if parsed.port is None else f"{hostname}:{parsed.port}"
api_base = f"{parsed.scheme}://{authority}/api/v3"
else:
return None

# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
Expand All @@ -114,7 +139,7 @@ def resolve_github_release_asset_api_url(
owner, repo, tag = parts[0], parts[1], parts[4]
asset_name = "/".join(parts[5:])
encoded_tag = quote(tag, safe="")
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"
release_url = f"{api_base}/repos/{owner}/{repo}/releases/tags/{encoded_tag}"

try:
with open_url_fn(release_url, timeout=timeout) as response:
Expand Down
14 changes: 14 additions & 0 deletions src/specify_cli/authentication/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,20 @@ def build_request(url: str, extra_headers: dict[str, str] | None = None) -> urll
return urllib.request.Request(url, headers=headers)


def github_provider_hosts() -> tuple[str, ...]:
"""Return host patterns from every ``github`` provider entry in ``auth.json``.

Used to classify which hosts are GitHub Enterprise Server instances when
resolving release-asset download URLs. Returns an empty tuple when no
``auth.json`` exists or it contains no ``github`` entries.
"""
hosts: list[str] = []
for entry in _load_config():
if entry.provider == "github":
hosts.extend(entry.hosts)
return tuple(hosts)


def open_url(
url: str,
timeout: int = 10,
Expand Down
10 changes: 8 additions & 2 deletions src/specify_cli/extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,12 +2057,18 @@ def _resolve_github_release_asset_api_url(
) -> Optional[str]:
"""Resolve a GitHub release asset URL to its API asset URL.

Delegates to the shared helper in :mod:`specify_cli._github_http`.
Delegates to the shared helper in :mod:`specify_cli._github_http`,
passing the ``github`` provider hosts from ``auth.json`` so GitHub
Enterprise Server release assets resolve via ``/api/v3``.
"""
from specify_cli._github_http import resolve_github_release_asset_api_url
from specify_cli.authentication.http import github_provider_hosts

return resolve_github_release_asset_api_url(
download_url, self._open_url, timeout=timeout
download_url,
self._open_url,
timeout=timeout,
github_hosts=github_provider_hosts(),
)

def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:
Expand Down
13 changes: 11 additions & 2 deletions src/specify_cli/presets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,10 +1892,19 @@ def _resolve_github_release_asset_api_url(
download_url: str,
timeout: int = 60,
) -> Optional[str]:
"""Resolve a GitHub release asset URL to its REST API asset URL."""
"""Resolve a GitHub release asset URL to its REST API asset URL.

Passes the ``github`` provider hosts from ``auth.json`` so GitHub
Enterprise Server release assets resolve via ``/api/v3``.
"""
from specify_cli._github_http import resolve_github_release_asset_api_url
from specify_cli.authentication.http import github_provider_hosts

return resolve_github_release_asset_api_url(
download_url, self._open_url, timeout=timeout
download_url,
self._open_url,
timeout=timeout,
github_hosts=github_provider_hosts(),
)

def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:
Expand Down
5 changes: 4 additions & 1 deletion src/specify_cli/presets/_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,13 @@ def _validate_download_redirect(old_url, new_url):
zip_path = Path(tmpdir) / "preset.zip"
try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli.authentication.http import github_provider_hosts
from specify_cli._github_http import resolve_github_release_asset_api_url

_preset_extra_headers = None
_resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url)
_resolved_from_url = resolve_github_release_asset_api_url(
from_url, _open_url, github_hosts=github_provider_hosts()
)
if _resolved_from_url:
from_url = _resolved_from_url
_preset_extra_headers = {"Accept": "application/octet-stream"}
Expand Down
42 changes: 42 additions & 0 deletions tests/test_authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,3 +900,45 @@ def test_accept_header_present(self, monkeypatch):
with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect):
_fetch_latest_release_tag()
assert captured["request"].get_header("Accept") == "application/vnd.github+json"


# ---------------------------------------------------------------------------
# github_provider_hosts
# ---------------------------------------------------------------------------


class TestGithubProviderHosts:
"""Tests for github_provider_hosts() — the GHES host allowlist source."""

def _set_config(self, monkeypatch, entries):
from specify_cli.authentication import http as _auth_http
monkeypatch.setattr(_auth_http, "_config_override", entries)

def test_returns_hosts_from_github_entries(self, monkeypatch):
from specify_cli.authentication.http import github_provider_hosts
self._set_config(monkeypatch, [
AuthConfigEntry(hosts=("ghes.example", "raw.ghes.example"),
provider="github", auth="bearer", token="t"),
])
assert github_provider_hosts() == ("ghes.example", "raw.ghes.example")

def test_empty_when_no_config(self, monkeypatch):
from specify_cli.authentication.http import github_provider_hosts
self._set_config(monkeypatch, [])
assert github_provider_hosts() == ()

def test_ignores_non_github_providers(self, monkeypatch):
from specify_cli.authentication.http import github_provider_hosts
self._set_config(monkeypatch, [
AuthConfigEntry(hosts=("dev.azure.com",), provider="azure-devops",
auth="basic-pat", token="t"),
])
assert github_provider_hosts() == ()

def test_unions_multiple_github_entries(self, monkeypatch):
from specify_cli.authentication.http import github_provider_hosts
self._set_config(monkeypatch, [
AuthConfigEntry(hosts=("ghes.example",), provider="github", auth="bearer", token="t"),
AuthConfigEntry(hosts=("github.com",), provider="github", auth="bearer", token="t"),
])
assert github_provider_hosts() == ("ghes.example", "github.com")
35 changes: 35 additions & 0 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
import tempfile
import shutil
import tomllib
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime, timezone
from unittest.mock import MagicMock

from tests.conftest import strip_ansi
from specify_cli.extensions import (
Expand Down Expand Up @@ -7280,3 +7282,36 @@ def test_add_dev_force_reinstall(self, tmp_path):
)
assert result2.exit_code == 0, strip_ansi(result2.output)
assert "installed" in strip_ansi(result2.output)


def test_extension_wrapper_resolves_ghes_asset_when_host_configured(tmp_path, monkeypatch):
"""End-to-end wiring: auth.json github host → GHES asset resolution."""
from specify_cli.authentication import http as _auth_http
from specify_cli.authentication.config import AuthConfigEntry
from specify_cli.extensions import ExtensionCatalog

monkeypatch.setattr(_auth_http, "_config_override", [
AuthConfigEntry(hosts=("ghes.example",), provider="github",
auth="bearer", token="t"),
])
catalog = ExtensionCatalog(tmp_path)

captured = []

@contextmanager
def fake_open(url, timeout=None, extra_headers=None):
captured.append(url)
resp = MagicMock()
resp.read.return_value = json.dumps({
"assets": [{"name": "ext.zip",
"url": "https://ghes.example/api/v3/repos/o/r/releases/assets/7"}]
}).encode()
yield resp

monkeypatch.setattr(catalog, "_open_url", fake_open)

resolved = catalog._resolve_github_release_asset_api_url(
"https://ghes.example/o/r/releases/download/v1/ext.zip"
)
assert resolved == "https://ghes.example/api/v3/repos/o/r/releases/assets/7"
assert captured == ["https://ghes.example/api/v3/repos/o/r/releases/tags/v1"]
Loading