From f6875a2c9c836a5891ee52eda87a1e998a843a93 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 30 Jun 2026 12:10:41 -0700 Subject: [PATCH] from_template: cap eager coordinate allocation on the dask path A fine resolution on a dask backend gets past both size caps and then runs out of memory building the coordinate vectors at construction. The dask backends skip _MAX_CELLS because the grid data is lazy, and the _MAX_CHUNKS guard keys on block count -- the default tiling grows its block edge to keep that count near _DASK_MAX_BLOCKS, so a fine resolution never trips it. But _make_output_coords builds the x/y vectors eagerly with np.linspace, sized width + height, so from_template("conus", resolution=0.001, backend="dask") would allocate ~72 GB of coordinates up front while the grid data stays lazy. Add _MAX_COORD_CELLS (1e9 elements) and check width + height against it in the dask branch. Eager backends are already bounded by _MAX_CELLS, so this only constrains the otherwise-unbounded dask path; conus at 1 m, the finest grid in the tests, is ~9e6 coordinate elements. --- xrspatial/templates.py | 26 ++++++++++++++++++++++++++ xrspatial/tests/test_templates.py | 18 ++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/xrspatial/templates.py b/xrspatial/templates.py index 7509f1011..a1d79dfa2 100644 --- a/xrspatial/templates.py +++ b/xrspatial/templates.py @@ -45,6 +45,20 @@ # moderate grid, large enough that overlap halos stay cheap. _DASK_BLOCK = 2048 +# The grid data is lazy on a dask backend, but the x/y coordinate vectors are +# not: _make_output_coords builds them eagerly with np.linspace on every path, +# one element per column and per row. Their size grows with width + height, so a +# typo-level fine resolution can ask for tens of GB of coordinates even though +# the chunk graph stays small (the default tiling grows its block to keep the +# count under _MAX_CHUNKS, so that guard never trips on resolution alone). Cap +# the eager coordinate allocation so the dask cell-cap exemption can't be used to +# blow up the client at construction time. Eager backends are already bounded by +# _MAX_CELLS (width + height <= width * height for any other shape), so this only +# constrains the otherwise-unbounded dask path. 1e9 elements is ~8 GB at float64 +# and leaves wide headroom over the finest legitimate grids (conus at 1 m is +# ~9e6 coordinate elements). +_MAX_COORD_CELLS = 1_000_000_000 + # Ceiling on the block count for the default tiling. A 2048-cell block would # explode the graph at a typo-level fine resolution, so for very large grids the # block edge grows to keep the count near this many blocks. That keeps the @@ -604,6 +618,18 @@ def from_template(name: str, else: effective_chunks = chunks if is_dask: + # The grid stays lazy, but the x/y coordinate vectors are built eagerly + # (width + height elements). Guard that allocation so the dask cell-cap + # exemption can't be turned into an out-of-memory at construction time. + n_coord_cells = width + height + if n_coord_cells > _MAX_COORD_CELLS: + raise ValueError( + f"{shape_desc} produces a {height} x {width} grid whose x/y " + f"coordinate vectors total {n_coord_cells:,} elements, exceeding " + f"the {_MAX_COORD_CELLS:,}-element limit. The grid data stays " + f"lazy on a dask backend, but the coordinates are built eagerly, " + f"so this would allocate them up front. {coarsen}." + ) n_chunks = _estimate_n_chunks((height, width), effective_chunks) if n_chunks > _MAX_CHUNKS: # Report the request, not the expanded per-block tuple the default diff --git a/xrspatial/tests/test_templates.py b/xrspatial/tests/test_templates.py index af6bce1c8..0b4f66ba3 100644 --- a/xrspatial/tests/test_templates.py +++ b/xrspatial/tests/test_templates.py @@ -433,6 +433,24 @@ def test_explicit_dask_backend_chunk_count_raises(): from_template("conus", resolution=1, backend="dask+numpy", chunks=512) +@dask_array_available +def test_over_fine_dask_coord_alloc_raises(): + # The dask cell-cap exemption keeps the grid data lazy, and the default + # tiling grows its block so the chunk count stays under _MAX_CHUNKS -- but + # the x/y coordinate vectors (width + height elements) are built eagerly, so + # a typo-level fine resolution would allocate tens of GB of coordinates at + # construction. conus @ 1 mm is ~9e9 coordinate elements (~72 GB) but only + # ~2e5 chunks, so it slips past the chunk-count guard. The coordinate guard + # must catch it first. Match its text specifically. + from xrspatial.templates import _MAX_COORD_CELLS + with pytest.raises(ValueError, match="coordinate vectors"): + from_template("conus", resolution=0.001, backend="dask+numpy") + # The promotion path (chunks given on an eager backend) is guarded too. + with pytest.raises(ValueError, match="coordinate vectors"): + from_template("conus", resolution=0.001, chunks=-1) + assert _MAX_COORD_CELLS == 1_000_000_000 + + @dask_array_available def test_auto_chunks_exempt_from_chunk_cap(): import dask.array as da