From 398df5ae281fa4a0f9e9bc93cfed706512f1059a Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 30 Jun 2026 12:11:11 -0700 Subject: [PATCH] Test mahalanobis Inf input, all-NaN, and degenerate shapes (#3583) mahalanobis() had complete backend and NaN coverage but left a few paths untested. Each behaves correctly on a CUDA host, so these are coverage gaps rather than bugs: - Inf/-Inf in any band -> NaN output and excluded from auto stats, verified identical across all four backends - the 'Not enough valid pixels' branch for all-NaN and too-few-valid inputs - 1x1 single-pixel (provided stats work, auto stats raise) and 1xN/Nx1 strips Test-only. 34 tests pass with no skips on a GPU host. --- .claude/sweep-test-coverage-state.csv | 1 + xrspatial/tests/test_mahalanobis.py | 141 ++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index e1288fd8f..a01ec7750 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -14,6 +14,7 @@ idw,2026-06-04,2919,HIGH,1;4,"cupy/dask+cupy backends untested (Cat1 HIGH); GPU interpolate,2026-06-12,3290,MEDIUM,2;3;4;5,"Deep-sweep 2026-06-12 on CUDA host. Backend coverage already complete: all 4 backends exercised for idw/kriging/spline incl. cross-backend equivalence and variance paths; no Cat 1 gaps. Filed #3290 for MEDIUM gaps, all verified correct-by-probe before filing (test-only fix): idw fill_value zero-weight branch (deterministic via 1e200 distance weight underflow; added numpy+dask+cupy, cupy RAN+PASSED), idw power only tested at default (exact oracle 10/(2^p+1)), spline collinear lstsq fallback, kriging duplicate points + all-equal-z (zero-variance variogram) + exactly-singular K regularisation retry (unit test on _build_kriging_matrix with all-zero variogram), spline/kriging 1x1 template, Inf/-Inf point filtering (only NaN was tested), lat/lon dim-name propagation (parametrized all 3 funcs), idw attrs preservation, 0-column template. Remaining minor untested: _build_kriging_matrix warn-then-NaN branch (needs mocked LinAlgError on retry). LOW documented not fixed: no asv benchmarks, non-uniform cell spacing unasserted. Full file 82 passed 0 skipped locally." interpolate-kriging,2026-06-04,2920;2921,HIGH,1;2;3;4;5,"Single public fn kriging(); all 4 backends already had cross-backend parity tests (numpy/cupy/dask+numpy/dask+cupy) incl. cupy & dask+cupy variance -- ran green on CUDA host. Gaps closed (test-only, #2921): Cat1 dask+numpy return_variance branch (_chunk_var) was untested -> added test_dask_return_variance_matches_numpy (atol=1e-12, var ~1e-14). Cat4 nlags only default(15) tested -> added non-default nlags=5 + invalid paths (nlags=0/-1 ValueError, nlags=2.5 TypeError). Cat2/3 two-point <3-lag-bins UserWarning branch -> test_two_point_warns_few_lag_bins. Cat2 all-NaN kriging input -> test_kriging_all_nan_points (only idw covered before). Cat5 output metadata (coords/dims/attrs/name) untested -> added test_output_metadata. Single-point kriging CRASHES (zero-size array reduction in _experimental_variogram, N=1) -- real source bug filed #2920; added xfail(strict, raises=ValueError) test_single_point documenting expected graceful behavior; source fix left to #2920 (test-only PR). LOW/not filed: singular-matrix K_inv-is-None all-NaN branch is defensive and unreachable via public API. GPU-validated." interpolate_spline,2026-06-04,,HIGH,1;3;5,scope=spline-only; cupy+dask_cupy spline backends untested (_tps_cuda_kernel) | n==2 affine branch + metadata untested | added 4 tests to TestSpline all pass on CUDA host | issue-create denied by classifier no GH issue +mahalanobis,2026-06-30,3583,MEDIUM,2;3;4,"Deep-sweep 2026-06-30 test-coverage on a CUDA host. Backend matrix already complete: numpy/cupy/dask+numpy/dask+cupy all tested with cross-backend parity (auto-stats path) plus user-provided-stats and analytical checks (Cat 1 no gap). Cat 5 covered by test_output_metadata + general_output_checks. Found three untested-but-correct paths, all GPU-validated before adding tests (coverage gaps, not bugs): Cat 2 Inf/-Inf input -> NaN output + excluded from stats, 4-backend parity (test_inf_*); Cat 2/Cat 4 'Not enough valid pixels' error branch for all-NaN and too-few-valid (test_error_all_nan_input, test_error_too_few_valid_pixels); Cat 3 1x1 single-pixel with provided stats works / auto-stats raises, and 1xN+Nx1 strips (test_single_pixel_*, test_strip_shapes_match_numpy_dask). 9 tests added, 34 pass with 0 skips on GPU host. #3583/PR pending." mcda,2026-06-10,3149,HIGH,1;2;5,"Pass 1 (2026-06-10, deep-sweep test-coverage): test_mcda.py had 175 tests, all numpy or dask+numpy -- zero cupy/dask+cupy coverage despite explicit cupy branches in standardize._get_xp and combine._sort_descending (Cat 1 HIGH). Filed #3149, added ~70 tests: cross-backend parity for standardize (7 methods) x cupy/dask+numpy/dask+cupy, combine (wlc/wpm/fuzzy and-or-sum-product-gamma/owa) x 3 backends, constrain, boolean_overlay, sensitivity OAT+MC on GPU backends; metadata preservation (attrs/coords/dims/name) for every stage (Cat 5 MEDIUM); wpm all-NaN criterion + Inf propagation through wlc/fuzzy-and (Cat 2 MEDIUM). All RUN on a CUDA host: 233 passed, 11 xfailed. Probing surfaced real source bugs already filed by sibling sweeps as #3146 (owa raises on ALL dask backends -- _sort_descending calls nonexistent da.sort; owa cupy mixes numpy order weights into cupy stack; piecewise standardize broken on cupy + dask+cupy and categorical on dask+cupy via np.asarray on cupy chunks; monte_carlo sensitivity reads .values on cupy data) and #3147 (constrain drops attrs when masks applied) -- those paths pinned with strict xfail markers to flip on fix; constrain cupy/dask+cupy xfail(strict=False) on the known cupy 13.6 + xarray xr.where dependency incompat, not an mcda bug. Source untouched (test-only PR). LOW (documented, not fixed): name= output parameter untested across combine functions; empty (0-row) raster untested -- elementwise ops, judged low value. weights.py (ahp/rank) is pure-numpy metadata, backend matrix N/A, already well covered." morphology,2026-06-20,3404,MEDIUM,2;3,"Added Inf/-Inf, all-NaN, Nx1/1xN strip, integer-dtype tests; source already correct, regression guards only; cupy + dask+cupy ran on GPU host" multispectral,2026-06-20,3431,MEDIUM,2;3;4,true_color NaN/alpha + all-equal range_val==0 + nondefault nodata/c/th; evi & savi validation error paths; GPU tests ran (cupy+dask+cupy) diff --git a/xrspatial/tests/test_mahalanobis.py b/xrspatial/tests/test_mahalanobis.py index e76ddddb5..775be4c41 100644 --- a/xrspatial/tests/test_mahalanobis.py +++ b/xrspatial/tests/test_mahalanobis.py @@ -9,6 +9,7 @@ dask_array_available, general_output_checks, ) +from xrspatial.utils import has_dask_array # --- fixtures --- @@ -390,6 +391,146 @@ def test_memory_guard_skipped_for_dask(monkeypatch, band_arrays): assert result.shape == dk_bands[0].shape +# --- Inf / -Inf handling (issue #3583) --- + +def _band_data_with_inf(): + """Same correlated bands as ``_band_data`` but with Inf/-Inf cells.""" + bands = _band_data() + # replace the injected NaNs with finite values so the only non-finite + # cells are the Inf/-Inf ones we add below + bands[0][0, 0] = 0.1 + bands[1][3, 2] = -0.2 + bands[2][7, 3] = 0.3 + bands[0][2, 1] = np.inf + bands[1][5, 0] = -np.inf + bands[2][6, 2] = np.inf + return bands + + +def test_inf_propagates_to_nan_numpy(): + """A non-finite (Inf/-Inf) cell in any band -> NaN output, and the Inf + pixel is excluded from the auto-computed statistics.""" + band_arrays = _band_data_with_inf() + bands = [create_test_raster(b, backend='numpy') for b in band_arrays] + result = mahalanobis(bands) + + assert np.isnan(result.values[2, 1]) + assert np.isnan(result.values[5, 0]) + assert np.isnan(result.values[6, 2]) + # everything else is finite and non-negative + finite = ~np.isnan(result.values) + assert np.all(np.isfinite(result.values[finite])) + assert np.all(result.values[finite] >= 0) + + +@dask_array_available +def test_inf_matches_numpy_dask(): + band_arrays = _band_data_with_inf() + np_bands = [create_test_raster(b, backend='numpy') for b in band_arrays] + dk_bands = [create_test_raster(b, backend='dask+numpy', chunks=(4, 2)) + for b in band_arrays] + + np_result = mahalanobis(np_bands) + dk_result = mahalanobis(dk_bands) + np.testing.assert_allclose( + np_result.values, dk_result.values, rtol=1e-10, equal_nan=True + ) + + +@cuda_and_cupy_available +def test_inf_matches_numpy_cupy(): + band_arrays = _band_data_with_inf() + np_bands = [create_test_raster(b, backend='numpy') for b in band_arrays] + cu_bands = [create_test_raster(b, backend='cupy') for b in band_arrays] + + np_result = mahalanobis(np_bands) + cu_result = mahalanobis(cu_bands) + np.testing.assert_allclose( + np_result.values, cu_result.data.get(), rtol=1e-10, equal_nan=True + ) + + +@cuda_and_cupy_available +def test_inf_matches_numpy_dask_cupy(): + band_arrays = _band_data_with_inf() + np_bands = [create_test_raster(b, backend='numpy') for b in band_arrays] + dc_bands = [create_test_raster(b, backend='dask+cupy', chunks=(4, 2)) + for b in band_arrays] + + np_result = mahalanobis(np_bands) + dc_result = mahalanobis(dc_bands) + np.testing.assert_allclose( + np_result.values, dc_result.data.compute().get(), + rtol=1e-10, equal_nan=True + ) + + +# --- not-enough-valid-pixels error path (Cat 2 all-NaN / Cat 4 error) --- + +def test_error_all_nan_input(): + """All-NaN bands leave zero valid pixels -> the statistics phase raises.""" + bands = [xr.DataArray(np.full((4, 4), np.nan), dims=['y', 'x']) + for _ in range(2)] + with pytest.raises(ValueError, match="Not enough valid pixels"): + mahalanobis(bands) + + +def test_error_too_few_valid_pixels(): + """Fewer finite pixels than n_bands+1 cannot form a covariance matrix.""" + # 2 bands need >= 3 valid pixels; leave only 2 finite cells. + b1 = np.full((2, 2), np.nan) + b2 = np.full((2, 2), np.nan) + b1[0, 0] = 1.0 + b1[0, 1] = 2.0 + b2[0, 0] = 3.0 + b2[0, 1] = 4.0 + bands = [xr.DataArray(b, dims=['y', 'x']) for b in [b1, b2]] + with pytest.raises(ValueError, match="Not enough valid pixels"): + mahalanobis(bands) + + +# --- degenerate shapes (Cat 3) --- + +def test_single_pixel_with_provided_stats(): + """A 1x1 raster has no spread to auto-fit, but provided stats still + yield the correct distance for the lone pixel.""" + b1 = xr.DataArray(np.array([[3.0]]), dims=['y', 'x']) + b2 = xr.DataArray(np.array([[4.0]]), dims=['y', 'x']) + result = mahalanobis([b1, b2], mean=np.zeros(2), inv_cov=np.eye(2)) + # identity inv_cov -> Euclidean distance from origin = sqrt(9 + 16) = 5 + assert result.shape == (1, 1) + np.testing.assert_allclose(result.values, [[5.0]], rtol=1e-12) + + +def test_single_pixel_auto_stats_raises(): + """Auto-computed stats need n_bands+1 valid pixels; 1x1 cannot supply + them.""" + b1 = xr.DataArray(np.array([[3.0]]), dims=['y', 'x']) + b2 = xr.DataArray(np.array([[4.0]]), dims=['y', 'x']) + with pytest.raises(ValueError, match="Not enough valid pixels"): + mahalanobis([b1, b2]) + + +@pytest.mark.parametrize("shape", [(1, 8), (8, 1)]) +def test_strip_shapes_match_numpy_dask(shape): + """1xN and Nx1 strips burn correctly and agree across numpy and dask.""" + rng = np.random.default_rng(13) + band_arrays = [rng.standard_normal(shape) for _ in range(2)] + + np_bands = [create_test_raster(b, backend='numpy') for b in band_arrays] + np_result = mahalanobis(np_bands) + assert np_result.shape == shape + assert np.all(np.isfinite(np_result.values)) + + if has_dask_array(): + dk_bands = [create_test_raster(b, backend='dask+numpy', chunks=shape) + for b in band_arrays] + dk_result = mahalanobis(dk_bands) + np.testing.assert_allclose( + np_result.values, dk_result.values, rtol=1e-10, equal_nan=True + ) + + @cuda_and_cupy_available def test_memory_guard_cupy_rejects_oversize(monkeypatch): """CuPy backend raises MemoryError when projected VRAM > 50% of free."""