Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions xrspatial/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@
# moderate grid, large enough that overlap halos stay cheap.
_DASK_BLOCK = 2048

# The grid data is lazy on a dask backend, but the x/y coordinate vectors are
# not: _make_output_coords builds them eagerly with np.linspace on every path,
# one element per column and per row. Their size grows with width + height, so a
# typo-level fine resolution can ask for tens of GB of coordinates even though
# the chunk graph stays small (the default tiling grows its block to keep the
# count under _MAX_CHUNKS, so that guard never trips on resolution alone). Cap
# the eager coordinate allocation so the dask cell-cap exemption can't be used to
# blow up the client at construction time. Eager backends are already bounded by
# _MAX_CELLS (width + height <= width * height for any other shape), so this only
# constrains the otherwise-unbounded dask path. 1e9 elements is ~8 GB at float64
# and leaves wide headroom over the finest legitimate grids (conus at 1 m is
# ~9e6 coordinate elements).
_MAX_COORD_CELLS = 1_000_000_000

# Ceiling on the block count for the default tiling. A 2048-cell block would
# explode the graph at a typo-level fine resolution, so for very large grids the
# block edge grows to keep the count near this many blocks. That keeps the
Expand Down Expand Up @@ -604,6 +618,18 @@ def from_template(name: str,
else:
effective_chunks = chunks
if is_dask:
# The grid stays lazy, but the x/y coordinate vectors are built eagerly
# (width + height elements). Guard that allocation so the dask cell-cap
# exemption can't be turned into an out-of-memory at construction time.
n_coord_cells = width + height
if n_coord_cells > _MAX_COORD_CELLS:
raise ValueError(
f"{shape_desc} produces a {height} x {width} grid whose x/y "
f"coordinate vectors total {n_coord_cells:,} elements, exceeding "
f"the {_MAX_COORD_CELLS:,}-element limit. The grid data stays "
f"lazy on a dask backend, but the coordinates are built eagerly, "
f"so this would allocate them up front. {coarsen}."
)
n_chunks = _estimate_n_chunks((height, width), effective_chunks)
if n_chunks > _MAX_CHUNKS:
# Report the request, not the expanded per-block tuple the default
Expand Down
18 changes: 18 additions & 0 deletions xrspatial/tests/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,24 @@ def test_explicit_dask_backend_chunk_count_raises():
from_template("conus", resolution=1, backend="dask+numpy", chunks=512)


@dask_array_available
def test_over_fine_dask_coord_alloc_raises():
# The dask cell-cap exemption keeps the grid data lazy, and the default
# tiling grows its block so the chunk count stays under _MAX_CHUNKS -- but
# the x/y coordinate vectors (width + height elements) are built eagerly, so
# a typo-level fine resolution would allocate tens of GB of coordinates at
# construction. conus @ 1 mm is ~9e9 coordinate elements (~72 GB) but only
# ~2e5 chunks, so it slips past the chunk-count guard. The coordinate guard
# must catch it first. Match its text specifically.
from xrspatial.templates import _MAX_COORD_CELLS
with pytest.raises(ValueError, match="coordinate vectors"):
from_template("conus", resolution=0.001, backend="dask+numpy")
# The promotion path (chunks given on an eager backend) is guarded too.
with pytest.raises(ValueError, match="coordinate vectors"):
from_template("conus", resolution=0.001, chunks=-1)
assert _MAX_COORD_CELLS == 1_000_000_000


@dask_array_available
def test_auto_chunks_exempt_from_chunk_cap():
import dask.array as da
Expand Down
Loading