hotdata-dev · zfarrell · Jun 28, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/.openapi-generator-ignore b/.openapi-generator-ignore
@@ -8,11 +8,13 @@ setup.py
 # never emits or overwrites them, but they are listed here as the source of
 # truth for "hand-maintained, don't touch": _auth.py (JWT exchange), arrow.py
 # (Arrow IPC result fetch), query.py (429 retry + truncation auto-follow, #688),
-# _retry.py (pre-response connection-reset retry on all methods, #118).
+# _retry.py (pre-response connection-reset retry on all methods, #118),
+# uploads.py (transparent presigned direct-to-storage upload flow).
 hotdata/_auth.py
 hotdata/arrow.py
 hotdata/query.py
 hotdata/_retry.py
+hotdata/uploads.py
 
 # Hand-written test for the patched ApiClient.close()/context-manager behavior
 # (re-applied by scripts/patch_api_client_close.py). It lives in the generated

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- `hotdata.UploadsApi` gains `upload_file(source, ...)`, a transparent
+  direct-to-storage upload. Give it a file path, raw `bytes`, or a seekable
+  binary file object and it opens an upload session, sends the data **straight
+  to object storage** (a single request for a small file, concurrent multipart
+  for a large one), and finalizes — returning the `FinalizeUploadResponse`. Your
+  bytes never round-trip through the API. Supports a progress callback, an
+  auto-scaled (or caller-set) part size, bounded concurrency with a peak-memory
+  budget, and idempotent per-part retry (tunable via `part_retry`). Failures
+  raise a typed hierarchy under `UploadError`: `StorageError`,
+  `StorageTransportError`, `MissingETagError`, `MalformedSessionError`, and
+  `SizeLimitError`.
+- `hotdata.UploadsApi.upload_stream` uploads `bytes` or a binary stream
+  (streamed without buffering) in a single request — the fallback for when
+  direct-to-storage uploads aren't available or the source isn't seekable.
+
 ### Changed
 
 - feat(uploads): add file upload endpoints

diff --git a/README.md b/README.md
@@ -89,6 +89,61 @@ with ApiClient(Configuration(api_key="...", workspace_id="...")) as client:
 
 Both methods accept `offset` and `limit` for pagination. They raise `hotdata.arrow.ResultNotReadyError` if the result is still pending or processing — poll `results.get_result(result_id)` until `status == "ready"` first.
 
+## File uploads
+
+`hotdata.uploads.UploadsApi` (also the default `hotdata.UploadsApi`) adds
+`upload_file`, which uploads a local file **directly to object storage** and
+finalizes it in one call. It opens an upload session, `PUT`s the bytes straight
+to storage — a single `PUT` for a small file, concurrent part `PUT`s for a large
+one — then finalizes. The bytes never round-trip through the API.
+
+```python
+from hotdata import ApiClient, Configuration, UploadsApi
+
+with ApiClient(Configuration(api_key="...", workspace_id="...")) as client:
+    uploads = UploadsApi(client)
+
+    finalized = uploads.upload_file(
+        "data.parquet",
+        content_type="application/parquet",
+        progress=lambda done, total: print(f"{done}/{total} bytes"),
+    )
+
+    # Pass finalized.upload_id to the managed-table load endpoint.
+    print(finalized.upload_id)
+```
+
+`upload_file` accepts a path, raw `bytes`, or a seekable binary file object
+(`size` is inferred for all three; a file object is read from its current
+position to the end). The SDK picks single vs. multipart from the size,
+auto-scales the part size, and bounds part concurrency to a peak-memory budget
+(override with `part_size` / `max_concurrency` / `part_retry`). Storage `PUT`s go
+through a dedicated, header-isolated connection pool, so the SDK's auth and
+workspace headers never reach object storage (which would otherwise reject the
+upload). Finalize is sent with retries disabled so the exactly-once call is never
+accidentally replayed.
+
+Failures surface as a typed hierarchy under `hotdata.uploads.UploadError`:
+`StorageError` (storage returned a non-2xx), `StorageTransportError` (the PUT
+failed before any response), `MissingETagError`, `MalformedSessionError`, and
+`SizeLimitError`. Opening the session or finalizing raises the usual
+`hotdata.exceptions.ApiException` — for example a `501` `PRESIGN_UNSUPPORTED`,
+meaning the backend cannot issue upload URLs.
+
+For that fallback (or to upload from a non-seekable stream), use `upload_stream`,
+which sends the bytes to the legacy `POST /v1/files` endpoint in one request,
+streaming a file object without buffering it in memory:
+
+```python
+with open("data.parquet", "rb") as f:
+    resp = uploads.upload_stream(f, content_type="application/parquet")
+print(resp.id)
+```
+
+Note `upload_file` shadows the generated raw-body `upload_file(body=...)`; that
+raw operation is still reachable at
+`hotdata.api.uploads_api.UploadsApi.upload_file`.
+
 ## API reference
 
 Generated Markdown for every operation and model is in [`docs/`](https://github.com/hotdata-dev/sdk-python/tree/main/docs):

diff --git a/hotdata/__init__.py b/hotdata/__init__.py
@@ -322,7 +322,9 @@
 # --- hand-applied: prefer the enhanced clients over the generated ones
 # (re-applied by scripts/patch_query_exports.py after regeneration).
 # hotdata.query.QueryApi adds 429 retry + truncation auto-follow;
-# hotdata.arrow.ResultsApi adds Arrow IPC result fetch. The raw generated
-# classes remain importable from hotdata.api.query_api / hotdata.api.results_api.
+# hotdata.arrow.ResultsApi adds Arrow IPC result fetch;
+# hotdata.uploads.UploadsApi adds transparent presigned direct-to-storage
+# uploads. The raw generated classes remain importable from hotdata.api.*.
 from hotdata.query import QueryApi as QueryApi  # noqa: E402,F811
 from hotdata.arrow import ResultsApi as ResultsApi  # noqa: E402,F811
+from hotdata.uploads import UploadsApi as UploadsApi  # noqa: E402,F811