From 5e713a3a10f6cbfb711b0c5b40510cac2dac1aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Tue, 30 Jun 2026 12:24:55 +0200 Subject: [PATCH] json: Report unterminated JSON strings as syntax errors Fixes php/php-src#22527. --- ext/json/json_scanner.re | 9 ++- ext/json/tests/gh22527.phpt | 31 +++++++ ...son_last_error_msg_error_location_001.phpt | 5 +- ...son_last_error_msg_error_location_002.phpt | 41 +++++----- ...son_last_error_msg_error_location_004.phpt | 13 ++- ...son_last_error_msg_error_location_005.phpt | 41 +++++----- ...son_last_error_msg_error_location_006.phpt | 5 +- ...son_last_error_msg_error_location_007.phpt | 5 +- ...son_last_error_msg_error_location_008.phpt | 81 +++++++++---------- ...son_last_error_msg_error_location_009.phpt | 21 +++-- ...son_last_error_msg_error_location_010.phpt | 17 ++-- 11 files changed, 149 insertions(+), 120 deletions(-) create mode 100644 ext/json/tests/gh22527.phpt diff --git a/ext/json/json_scanner.re b/ext/json/json_scanner.re index e4d25009132a..0c64a6423baf 100644 --- a/ext/json/json_scanner.re +++ b/ext/json/json_scanner.re @@ -262,7 +262,14 @@ std: s->errcode = PHP_JSON_ERROR_UTF8; return PHP_JSON_T_ERROR; } - + EOI { + if (s->limit < s->cursor) { + s->errcode = PHP_JSON_ERROR_SYNTAX; + } else { + s->errcode = PHP_JSON_ERROR_CTRL_CHAR; + } + return PHP_JSON_T_ERROR; + } CTRL { s->errcode = PHP_JSON_ERROR_CTRL_CHAR; return PHP_JSON_T_ERROR; diff --git a/ext/json/tests/gh22527.phpt b/ext/json/tests/gh22527.phpt new file mode 100644 index 000000000000..cb0dd982d50b --- /dev/null +++ b/ext/json/tests/gh22527.phpt @@ -0,0 +1,31 @@ +--TEST-- +GH-22527: Unterminated JSON strings are misleadingly reported as “Control character error” +--FILE-- + +--EXPECT-- +NULL +int(4) +string(30) "Syntax error near location 1:1" +NULL +int(4) +string(30) "Syntax error near location 1:1" +NULL +int(4) +string(30) "Syntax error near location 1:9" +NULL +int(3) +string(71) "Control character error, possibly incorrectly encoded near location 1:1" diff --git a/ext/json/tests/json_last_error_msg_error_location_001.phpt b/ext/json/tests/json_last_error_msg_error_location_001.phpt index e0553f9f7d65..72cb1c42f23f 100644 --- a/ext/json/tests/json_last_error_msg_error_location_001.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_001.phpt @@ -66,8 +66,8 @@ string(30) "Syntax error near location 1:1" Error at position 1:10: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error at position 1:9: bool(false) @@ -118,4 +118,3 @@ Error at position 1:10: bool(false) int(3) string(72) "Control character error, possibly incorrectly encoded near location 1:10" - diff --git a/ext/json/tests/json_last_error_msg_error_location_002.phpt b/ext/json/tests/json_last_error_msg_error_location_002.phpt index df7fc981ccba..31438255b31a 100644 --- a/ext/json/tests/json_last_error_msg_error_location_002.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_002.phpt @@ -53,51 +53,50 @@ Testing error locations with Unicode UTF-8 characters Error after Japanese characters: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:12" +int(4) +string(31) "Syntax error near location 1:12" Error after Russian characters: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error after Chinese characters: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:8" +int(4) +string(30) "Syntax error near location 1:8" Error after Arabic characters: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error after Emoji: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" +int(4) +string(31) "Syntax error near location 1:11" Error in mixed ASCII and UTF-8: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:27" +int(4) +string(31) "Syntax error near location 1:27" Error with UTF-8 escaped sequences: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error in object with multiple UTF-8 keys: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:22" +int(4) +string(31) "Syntax error near location 1:22" Error in array with UTF-8 strings: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:18" +int(4) +string(31) "Syntax error near location 1:18" Error in nested object with UTF-8: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:15" - +int(4) +string(31) "Syntax error near location 1:15" diff --git a/ext/json/tests/json_last_error_msg_error_location_004.phpt b/ext/json/tests/json_last_error_msg_error_location_004.phpt index 165449600fb3..fcde3faa38a3 100644 --- a/ext/json/tests/json_last_error_msg_error_location_004.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_004.phpt @@ -53,8 +53,8 @@ Testing error locations in deeply nested structures Error in deeply nested object: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:31" +int(4) +string(31) "Syntax error near location 1:31" Error in deeply nested array: bool(true) @@ -78,16 +78,15 @@ string(31) "Syntax error near location 1:21" Error in complex structure: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:93" +int(4) +string(31) "Syntax error near location 1:93" Error in array of objects: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:68" +int(4) +string(31) "Syntax error near location 1:68" Error in object with array values: bool(false) int(2) string(61) "State mismatch (invalid or malformed JSON) near location 1:82" - diff --git a/ext/json/tests/json_last_error_msg_error_location_005.phpt b/ext/json/tests/json_last_error_msg_error_location_005.phpt index d12ce387e73e..ee1800eb8273 100644 --- a/ext/json/tests/json_last_error_msg_error_location_005.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_005.phpt @@ -53,51 +53,50 @@ Testing error locations with UTF-16 surrogate pairs and escape sequences Error after UTF-16 escaped emoji: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" +int(4) +string(31) "Syntax error near location 1:11" Error after multiple UTF-16 pairs: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with mixed UTF-8 and UTF-16: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" +int(4) +string(31) "Syntax error near location 1:11" Error with UTF-16 in key: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error with multiple UTF-16 keys: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:22" +int(4) +string(31) "Syntax error near location 1:22" Error with BMP characters: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with supplementary plane: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" +int(4) +string(31) "Syntax error near location 1:11" Error in array with UTF-16: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:12" +int(4) +string(31) "Syntax error near location 1:12" Error in nested structure with UTF-16: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:18" +int(4) +string(31) "Syntax error near location 1:18" Error with UTF-16 and control chars: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" - +int(4) +string(31) "Syntax error near location 1:10" diff --git a/ext/json/tests/json_last_error_msg_error_location_006.phpt b/ext/json/tests/json_last_error_msg_error_location_006.phpt index e6aab1af8f27..4a6c221c8f92 100644 --- a/ext/json/tests/json_last_error_msg_error_location_006.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_006.phpt @@ -107,8 +107,8 @@ string(33) "Syntax error near location 1:1011" Error with very long key: bool(false) -int(3) -string(73) "Control character error, possibly incorrectly encoded near location 1:506" +int(4) +string(32) "Syntax error near location 1:506" Error after empty object: bool(false) @@ -149,4 +149,3 @@ Error with mixed whitespace: bool(false) int(3) string(71) "Control character error, possibly incorrectly encoded near location 3:2" - diff --git a/ext/json/tests/json_last_error_msg_error_location_007.phpt b/ext/json/tests/json_last_error_msg_error_location_007.phpt index 0e24889bbbbe..168afc4dbfe2 100644 --- a/ext/json/tests/json_last_error_msg_error_location_007.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_007.phpt @@ -118,8 +118,8 @@ string(30) "Syntax error near location 1:9" Unclosed string: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Invalid escape sequence: bool(false) @@ -175,4 +175,3 @@ Missing comma between object properties: bool(false) int(4) string(30) "Syntax error near location 1:9" - diff --git a/ext/json/tests/json_last_error_msg_error_location_008.phpt b/ext/json/tests/json_last_error_msg_error_location_008.phpt index 4d8a1012316b..60dd9513472b 100644 --- a/ext/json/tests/json_last_error_msg_error_location_008.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_008.phpt @@ -82,101 +82,100 @@ Testing error locations with various UTF-8 multi-byte character widths Error with 2-byte UTF-8 (Latin Extended): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with 2-byte UTF-8 (Greek): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:14" +int(4) +string(31) "Syntax error near location 1:14" Error with 2-byte UTF-8 (Cyrillic): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:12" +int(4) +string(31) "Syntax error near location 1:12" Error with 3-byte UTF-8 (Chinese): bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:8" +int(4) +string(30) "Syntax error near location 1:8" Error with 3-byte UTF-8 (Japanese Hiragana): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with 3-byte UTF-8 (Japanese Katakana): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with 3-byte UTF-8 (Korean): bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:8" +int(4) +string(30) "Syntax error near location 1:8" Error with 4-byte UTF-8 (Emoji faces): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" +int(4) +string(31) "Syntax error near location 1:11" Error with 4-byte UTF-8 (Emoji objects): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:13" +int(4) +string(31) "Syntax error near location 1:13" Error with 4-byte UTF-8 (Mathematical symbols): bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with mixed 1-2-3 byte UTF-8: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error with mixed 2-3-4 byte UTF-8: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error with all byte widths: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error with UTF-8 key at start: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:7" +int(4) +string(30) "Syntax error near location 1:7" Error with multiple UTF-8 keys: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:35" +int(4) +string(31) "Syntax error near location 1:35" Error in array with mixed UTF-8: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:25" +int(4) +string(31) "Syntax error near location 1:25" Error in nested structure with various UTF-8: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:22" +int(4) +string(31) "Syntax error near location 1:22" Error with combining diacritical marks: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with Hebrew: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" +int(4) +string(31) "Syntax error near location 1:10" Error with Arabic with diacritics: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:11" - +int(4) +string(31) "Syntax error near location 1:11" diff --git a/ext/json/tests/json_last_error_msg_error_location_009.phpt b/ext/json/tests/json_last_error_msg_error_location_009.phpt index 406179693ef6..9d4403838f63 100644 --- a/ext/json/tests/json_last_error_msg_error_location_009.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_009.phpt @@ -75,18 +75,18 @@ string(46) "Maximum stack depth exceeded near location 1:7" Syntax error at deep nesting level: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:31" +int(4) +string(31) "Syntax error near location 1:31" Syntax error in deep array: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:6" +int(4) +string(30) "Syntax error near location 1:6" Error after valid deep structure: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:48" +int(4) +string(31) "Syntax error near location 1:48" Error in middle of nested structure: bool(false) @@ -95,16 +95,15 @@ string(31) "Syntax error near location 1:29" Error in array with nested objects: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:30" +int(4) +string(31) "Syntax error near location 1:30" Error in deep UTF-8 structure: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:16" +int(4) +string(31) "Syntax error near location 1:16" Valid deep structure within limit: bool(true) int(0) string(8) "No error" - diff --git a/ext/json/tests/json_last_error_msg_error_location_010.phpt b/ext/json/tests/json_last_error_msg_error_location_010.phpt index 108570205838..d187afff86cc 100644 --- a/ext/json/tests/json_last_error_msg_error_location_010.phpt +++ b/ext/json/tests/json_last_error_msg_error_location_010.phpt @@ -119,13 +119,13 @@ string(30) "Syntax error near location 4:2" Error in string with spaces: bool(false) -int(3) -string(71) "Control character error, possibly incorrectly encoded near location 1:9" +int(4) +string(30) "Syntax error near location 1:9" Error with whitespace around colon: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:12" +int(4) +string(31) "Syntax error near location 1:12" Error with whitespace around comma: bool(true) @@ -154,11 +154,10 @@ string(72) "Control character error, possibly incorrectly encoded near location Error in compact JSON: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:22" +int(4) +string(31) "Syntax error near location 1:22" Error with regular spaces: bool(false) -int(3) -string(72) "Control character error, possibly incorrectly encoded near location 1:10" - +int(4) +string(31) "Syntax error near location 1:10"