From 90ed9c75b42c1431c26d0d3c66c43f6a650b6d71 Mon Sep 17 00:00:00 2001 From: PurHur Date: Fri, 26 Jun 2026 18:13:57 +0200 Subject: [PATCH] Add json_decode flags for duplicate JSON object keys RFC 8259 allows the same name to appear more than once in a JSON object, but it does not say what a parser should do with the values. PHP has always used last-key-wins, which is reasonable as a default, yet there are real payloads where you want every value instead of silently dropping the earlier ones or always deep-merging nested structures. This adds two opt-in flags: - JSON_DUPLICATE_KEY_ARRAY keeps each duplicate value under the same key as a list. - JSON_DUPLICATE_KEY_MERGE recursively merges nested objects/arrays when the same key appears again; scalars still overwrite like today. The flags are mutually exclusive. If neither is passed, behavior is unchanged. The merge/array logic is hooked up through the existing parser method table so the default decode path stays on the original object_update implementation. Co-authored-by: Cursor --- NEWS | 2 + UPGRADING | 2 + ext/json/json.c | 5 + ext/json/json.stub.php | 11 + ext/json/json_arginfo.h | 4 +- ext/json/json_parser.y | 303 +++++++++++++++++- ext/json/php_json.h | 4 + .../json_decode_duplicate_key_array.phpt | 32 ++ .../json_decode_duplicate_key_default.phpt | 12 + ...json_decode_duplicate_key_flags_error.phpt | 12 + .../json_decode_duplicate_key_merge.phpt | 27 ++ .../tests/json_decode_legacy_behavior.phpt | 65 ++++ 12 files changed, 477 insertions(+), 2 deletions(-) create mode 100644 ext/json/tests/json_decode_duplicate_key_array.phpt create mode 100644 ext/json/tests/json_decode_duplicate_key_default.phpt create mode 100644 ext/json/tests/json_decode_duplicate_key_flags_error.phpt create mode 100644 ext/json/tests/json_decode_duplicate_key_merge.phpt create mode 100644 ext/json/tests/json_decode_legacy_behavior.phpt diff --git a/NEWS b/NEWS index 9df461452c1c..372148359acb 100644 --- a/NEWS +++ b/NEWS @@ -114,6 +114,8 @@ PHP NEWS - JSON: . Enriched JSON last error / exception message with error location. (Juan Morales) + . Added JSON_DUPLICATE_KEY_MERGE and JSON_DUPLICATE_KEY_ARRAY flags to + json_decode() to control handling of duplicate object keys. (PurHur) - Fibers: . Fixed bug GH-20483 (ASAN stack overflow with fiber.stack_size INI small diff --git a/UPGRADING b/UPGRADING index 15c1aad15db0..71a1fefd90c8 100644 --- a/UPGRADING +++ b/UPGRADING @@ -222,6 +222,8 @@ PHP 8.6 UPGRADE NOTES - JSON: . Added extra info about error location to the JSON error messages returned from json_last_error_msg() and JsonException message. + . Added JSON_DUPLICATE_KEY_MERGE and JSON_DUPLICATE_KEY_ARRAY flags to + json_decode() for duplicate object key handling. - OpenSSL: . Added TLS session resumption support for streams with new stream context diff --git a/ext/json/json.c b/ext/json/json.c index ac033c057ac4..cd64bf06d54a 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -322,6 +322,11 @@ PHP_FUNCTION(json_decode) RETURN_THROWS(); } + if ((options & PHP_JSON_DUPLICATE_KEY_MERGE) && (options & PHP_JSON_DUPLICATE_KEY_ARRAY)) { + zend_argument_value_error(4, "cannot combine JSON_DUPLICATE_KEY_MERGE and JSON_DUPLICATE_KEY_ARRAY"); + RETURN_THROWS(); + } + /* For BC reasons, the bool $assoc overrides the long $options bit for PHP_JSON_OBJECT_AS_ARRAY */ if (!assoc_null) { if (assoc) { diff --git a/ext/json/json.stub.php b/ext/json/json.stub.php index a805c3893dd1..8a3b73e46986 100644 --- a/ext/json/json.stub.php +++ b/ext/json/json.stub.php @@ -90,6 +90,17 @@ */ const JSON_THROW_ON_ERROR = UNKNOWN; +/** + * @var int + * @cvalue PHP_JSON_DUPLICATE_KEY_MERGE + */ +const JSON_DUPLICATE_KEY_MERGE = UNKNOWN; +/** + * @var int + * @cvalue PHP_JSON_DUPLICATE_KEY_ARRAY + */ +const JSON_DUPLICATE_KEY_ARRAY = UNKNOWN; + /** * @var int * @cvalue PHP_JSON_ERROR_NONE diff --git a/ext/json/json_arginfo.h b/ext/json/json_arginfo.h index 87ba9cce3afd..98c5640c09e5 100644 --- a/ext/json/json_arginfo.h +++ b/ext/json/json_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit json.stub.php instead. - * Stub hash: 0ceb50047401c4b9e878c09cc518eacc274f7fff */ + * Stub hash: 0ea2d3c11567e7e344d425092563204f6ee2a93c */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_json_encode, 0, 1, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, value, IS_MIXED, 0) @@ -68,6 +68,8 @@ static void register_json_symbols(int module_number) REGISTER_LONG_CONSTANT("JSON_INVALID_UTF8_IGNORE", PHP_JSON_INVALID_UTF8_IGNORE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_INVALID_UTF8_SUBSTITUTE", PHP_JSON_INVALID_UTF8_SUBSTITUTE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_THROW_ON_ERROR", PHP_JSON_THROW_ON_ERROR, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_DUPLICATE_KEY_MERGE", PHP_JSON_DUPLICATE_KEY_MERGE, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_DUPLICATE_KEY_ARRAY", PHP_JSON_DUPLICATE_KEY_ARRAY, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_STATE_MISMATCH", PHP_JSON_ERROR_STATE_MISMATCH, CONST_PERSISTENT); diff --git a/ext/json/json_parser.y b/ext/json/json_parser.y index 0d3b90b29e1e..7e3cd7ec00bf 100644 --- a/ext/json/json_parser.y +++ b/ext/json/json_parser.y @@ -68,6 +68,16 @@ static int php_json_yylex(union YYSTYPE *value, YYLTYPE *location, php_json_pars static void php_json_yyerror(YYLTYPE *location, php_json_parser *parser, char const *msg); static int php_json_parser_array_create(php_json_parser *parser, zval *array); static int php_json_parser_object_create(php_json_parser *parser, zval *array); +static void php_json_merge_array_into(zval *dest, zval *src); +static void php_json_merge_object_into(zval *dest, zval *src); +static void php_json_merge_values(zval *dest, zval *src); +static void php_json_build_duplicate_value_array(zval *existing, zval *zvalue, zval *result); +static int php_json_object_update_array_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); +static int php_json_object_update_object_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); +static int php_json_object_update_array_collect(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); +static int php_json_object_update_object_collect(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); +static int php_json_parser_object_update_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); +static int php_json_parser_object_update_array(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue); } @@ -232,6 +242,215 @@ static int php_json_parser_object_create(php_json_parser *parser, zval *object) return SUCCESS; } +static void php_json_merge_array_into(zval *dest, zval *src) +{ + HashTable *dest_ht = Z_ARRVAL_P(dest); + HashTable *src_ht = Z_ARRVAL_P(src); + zend_string *key; + zval *val; + + ZEND_HASH_FOREACH_STR_KEY_VAL(src_ht, key, val) { + if (key) { + zval *existing = zend_symtable_find(dest_ht, key); + + if (existing + && Z_TYPE_P(existing) == IS_ARRAY && Z_TYPE_P(val) == IS_ARRAY + ) { + if (zend_array_is_list(Z_ARRVAL_P(existing)) && zend_array_is_list(Z_ARRVAL_P(val))) { + zval *elem; + + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(val), elem) { + zval copy; + + ZVAL_COPY(©, elem); + zend_hash_next_index_insert(Z_ARRVAL_P(existing), ©); + } ZEND_HASH_FOREACH_END(); + } else { + php_json_merge_array_into(existing, val); + } + } else if (existing + && Z_TYPE_P(existing) == IS_OBJECT && Z_TYPE_P(val) == IS_OBJECT + ) { + php_json_merge_object_into(existing, val); + } else { + zval copy; + + ZVAL_COPY(©, val); + zend_symtable_update(dest_ht, key, ©); + } + } else { + zval copy; + + ZVAL_COPY(©, val); + zend_hash_next_index_insert(dest_ht, ©); + } + } ZEND_HASH_FOREACH_END(); +} + +static void php_json_merge_object_into(zval *dest, zval *src) +{ + zend_string *key; + zval *val; + + ZEND_HASH_FOREACH_STR_KEY_VAL(Z_OBJPROP_P(src), key, val) { + if (!key) { + continue; + } + + zval *existing = zend_read_property_ex(Z_OBJCE_P(dest), Z_OBJ_P(dest), key, 1, NULL); + + if (existing && Z_TYPE_P(existing) != IS_UNDEF) { + if (Z_TYPE_P(existing) == IS_ARRAY && Z_TYPE_P(val) == IS_ARRAY) { + if (zend_array_is_list(Z_ARRVAL_P(existing)) && zend_array_is_list(Z_ARRVAL_P(val))) { + zval *elem; + + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(val), elem) { + zval copy; + + ZVAL_COPY(©, elem); + zend_hash_next_index_insert(Z_ARRVAL_P(existing), ©); + } ZEND_HASH_FOREACH_END(); + } else { + php_json_merge_array_into(existing, val); + } + continue; + } + + if (Z_TYPE_P(existing) == IS_OBJECT && Z_TYPE_P(val) == IS_OBJECT) { + php_json_merge_object_into(existing, val); + continue; + } + } + + zval copy; + + ZVAL_COPY(©, val); + zend_std_write_property(Z_OBJ_P(dest), key, ©, NULL); + zval_ptr_dtor(©); + } ZEND_HASH_FOREACH_END(); +} + +static void php_json_merge_values(zval *dest, zval *src) +{ + if (Z_TYPE_P(dest) == IS_ARRAY && Z_TYPE_P(src) == IS_ARRAY) { + php_json_merge_array_into(dest, src); + } else if (Z_TYPE_P(dest) == IS_OBJECT && Z_TYPE_P(src) == IS_OBJECT) { + php_json_merge_object_into(dest, src); + } +} + +static void php_json_build_duplicate_value_array(zval *existing, zval *zvalue, zval *result) +{ + array_init(result); + zend_hash_next_index_insert(Z_ARRVAL_P(result), existing); + zend_hash_next_index_insert(Z_ARRVAL_P(result), zvalue); +} + +static int php_json_object_update_array_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + HashTable *ht = Z_ARRVAL_P(object); + zval *existing = zend_symtable_find(ht, key); + + (void) parser; + + if (!existing) { + zend_symtable_update(ht, key, zvalue); + return SUCCESS; + } + + if ((Z_TYPE_P(existing) == IS_ARRAY && Z_TYPE_P(zvalue) == IS_ARRAY) + || (Z_TYPE_P(existing) == IS_OBJECT && Z_TYPE_P(zvalue) == IS_OBJECT) + ) { + php_json_merge_values(existing, zvalue); + zval_ptr_dtor_nogc(zvalue); + return SUCCESS; + } + + zend_symtable_update(ht, key, zvalue); + return SUCCESS; +} + +static int php_json_object_update_object_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + zval *existing = zend_read_property_ex(Z_OBJCE_P(object), Z_OBJ_P(object), key, 1, NULL); + + (void) parser; + + if (!existing || Z_TYPE_P(existing) == IS_UNDEF) { + zend_std_write_property(Z_OBJ_P(object), key, zvalue, NULL); + Z_TRY_DELREF_P(zvalue); + return SUCCESS; + } + + if ((Z_TYPE_P(existing) == IS_ARRAY && Z_TYPE_P(zvalue) == IS_ARRAY) + || (Z_TYPE_P(existing) == IS_OBJECT && Z_TYPE_P(zvalue) == IS_OBJECT) + ) { + php_json_merge_values(existing, zvalue); + zval_ptr_dtor_nogc(zvalue); + return SUCCESS; + } + + zend_std_write_property(Z_OBJ_P(object), key, zvalue, NULL); + Z_TRY_DELREF_P(zvalue); + return SUCCESS; +} + +static int php_json_object_update_array_collect(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + HashTable *ht = Z_ARRVAL_P(object); + zval *existing = zend_symtable_find(ht, key); + + (void) parser; + + if (!existing) { + zend_symtable_update(ht, key, zvalue); + return SUCCESS; + } + + if (Z_TYPE_P(existing) == IS_ARRAY && zend_array_is_list(Z_ARRVAL_P(existing))) { + zend_hash_next_index_insert(Z_ARRVAL_P(existing), zvalue); + return SUCCESS; + } + + zval collected, copy; + + ZVAL_COPY(©, existing); + php_json_build_duplicate_value_array(©, zvalue, &collected); + zend_symtable_update(ht, key, &collected); + return SUCCESS; +} + +static int php_json_object_update_object_collect(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + zval *existing = zend_read_property_ex(Z_OBJCE_P(object), Z_OBJ_P(object), key, 1, NULL); + + (void) parser; + + if (!existing || Z_TYPE_P(existing) == IS_UNDEF) { + zend_std_write_property(Z_OBJ_P(object), key, zvalue, NULL); + Z_TRY_DELREF_P(zvalue); + return SUCCESS; + } + + if (Z_TYPE_P(existing) == IS_ARRAY && zend_array_is_list(Z_ARRVAL_P(existing))) { + zval copy; + + ZVAL_COPY(©, zvalue); + zend_hash_next_index_insert(Z_ARRVAL_P(existing), ©); + zval_ptr_dtor_nogc(zvalue); + return SUCCESS; + } + + zval collected, copy; + + ZVAL_COPY(©, existing); + php_json_build_duplicate_value_array(©, zvalue, &collected); + zend_std_write_property(Z_OBJ_P(object), key, &collected, NULL); + zval_ptr_dtor(&collected); + zval_ptr_dtor_nogc(zvalue); + return SUCCESS; +} + static int php_json_parser_object_update(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) { /* if JSON_OBJECT_AS_ARRAY is set */ @@ -253,6 +472,56 @@ static int php_json_parser_object_update(php_json_parser *parser, zval *object, return SUCCESS; } +static int php_json_parser_object_update_merge(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + if (Z_TYPE_P(object) == IS_ARRAY) { + if (FAILURE == php_json_object_update_array_merge(parser, object, key, zvalue)) { + zend_string_release_ex(key, 0); + return FAILURE; + } + } else { + if (ZSTR_LEN(key) > 0 && ZSTR_VAL(key)[0] == '\0') { + parser->scanner.errcode = PHP_JSON_ERROR_INVALID_PROPERTY_NAME; + zend_string_release_ex(key, 0); + zval_ptr_dtor_nogc(zvalue); + zval_ptr_dtor_nogc(object); + return FAILURE; + } + if (FAILURE == php_json_object_update_object_merge(parser, object, key, zvalue)) { + zend_string_release_ex(key, 0); + return FAILURE; + } + } + zend_string_release_ex(key, 0); + + return SUCCESS; +} + +static int php_json_parser_object_update_array(php_json_parser *parser, zval *object, zend_string *key, zval *zvalue) +{ + if (Z_TYPE_P(object) == IS_ARRAY) { + if (FAILURE == php_json_object_update_array_collect(parser, object, key, zvalue)) { + zend_string_release_ex(key, 0); + return FAILURE; + } + } else { + if (ZSTR_LEN(key) > 0 && ZSTR_VAL(key)[0] == '\0') { + parser->scanner.errcode = PHP_JSON_ERROR_INVALID_PROPERTY_NAME; + zend_string_release_ex(key, 0); + zval_ptr_dtor_nogc(zvalue); + zval_ptr_dtor_nogc(object); + return FAILURE; + } + if (FAILURE == php_json_object_update_object_collect(parser, object, key, zvalue)) { + zend_string_release_ex(key, 0); + return FAILURE; + } + } + zend_string_release_ex(key, 0); + + return SUCCESS; +} + static int php_json_parser_array_create_validate(php_json_parser *parser, zval *array) { ZVAL_NULL(array); @@ -330,6 +599,30 @@ static const php_json_parser_methods default_parser_methods = NULL, }; +static const php_json_parser_methods merge_parser_methods = +{ + php_json_parser_array_create, + php_json_parser_array_append, + NULL, + NULL, + php_json_parser_object_create, + php_json_parser_object_update_merge, + NULL, + NULL, +}; + +static const php_json_parser_methods array_parser_methods = +{ + php_json_parser_array_create, + php_json_parser_array_append, + NULL, + NULL, + php_json_parser_object_create, + php_json_parser_object_update_array, + NULL, + NULL, +}; + static const php_json_parser_methods validate_parser_methods = { php_json_parser_array_create_validate, @@ -365,6 +658,14 @@ PHP_JSON_API void php_json_parser_init(php_json_parser *parser, int options, int max_depth) { + const php_json_parser_methods *parser_methods = &default_parser_methods; + + if (options & PHP_JSON_DUPLICATE_KEY_MERGE) { + parser_methods = &merge_parser_methods; + } else if (options & PHP_JSON_DUPLICATE_KEY_ARRAY) { + parser_methods = &array_parser_methods; + } + php_json_parser_init_ex( parser, return_value, @@ -372,7 +673,7 @@ PHP_JSON_API void php_json_parser_init(php_json_parser *parser, str_len, options, max_depth, - &default_parser_methods); + parser_methods); } PHP_JSON_API int php_json_parse(php_json_parser *parser) diff --git a/ext/json/php_json.h b/ext/json/php_json.h index f34684e149d8..dc3895319e0f 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -89,6 +89,10 @@ static inline void php_json_error_details_clear(php_json_error_details *out) { #define PHP_JSON_INVALID_UTF8_SUBSTITUTE (1<<21) #define PHP_JSON_THROW_ON_ERROR (1<<22) +/* json_decode() duplicate object key handling (RFC 8259 leaves duplicates undefined) */ +#define PHP_JSON_DUPLICATE_KEY_MERGE (1<<23) +#define PHP_JSON_DUPLICATE_KEY_ARRAY (1<<24) + /* default depth */ #define PHP_JSON_PARSER_DEFAULT_DEPTH 512 diff --git a/ext/json/tests/json_decode_duplicate_key_array.phpt b/ext/json/tests/json_decode_duplicate_key_array.phpt new file mode 100644 index 000000000000..f368657096e5 --- /dev/null +++ b/ext/json/tests/json_decode_duplicate_key_array.phpt @@ -0,0 +1,32 @@ +--TEST-- +json_decode() JSON_DUPLICATE_KEY_ARRAY collects duplicate values into a list +--FILE-- + + array ( + 0 => 1, + 1 => 2, + 2 => 3, + ), +) +array ( + 'b' => + array ( + 0 => + array ( + 'x' => 1, + ), + 1 => + array ( + 'y' => 2, + ), + ), +) diff --git a/ext/json/tests/json_decode_duplicate_key_default.phpt b/ext/json/tests/json_decode_duplicate_key_default.phpt new file mode 100644 index 000000000000..ecbe82569156 --- /dev/null +++ b/ext/json/tests/json_decode_duplicate_key_default.phpt @@ -0,0 +1,12 @@ +--TEST-- +json_decode() duplicate object keys use last-key-wins by default +--FILE-- + 3, + 'b' => 2, +) diff --git a/ext/json/tests/json_decode_duplicate_key_flags_error.phpt b/ext/json/tests/json_decode_duplicate_key_flags_error.phpt new file mode 100644 index 000000000000..92fb04259da9 --- /dev/null +++ b/ext/json/tests/json_decode_duplicate_key_flags_error.phpt @@ -0,0 +1,12 @@ +--TEST-- +json_decode() rejects combining JSON_DUPLICATE_KEY_MERGE and JSON_DUPLICATE_KEY_ARRAY +--FILE-- +getMessage(), "\n"; +} +--EXPECT-- +json_decode(): Argument #4 ($flags) cannot combine JSON_DUPLICATE_KEY_MERGE and JSON_DUPLICATE_KEY_ARRAY diff --git a/ext/json/tests/json_decode_duplicate_key_merge.phpt b/ext/json/tests/json_decode_duplicate_key_merge.phpt new file mode 100644 index 000000000000..f97294df33c7 --- /dev/null +++ b/ext/json/tests/json_decode_duplicate_key_merge.phpt @@ -0,0 +1,27 @@ +--TEST-- +json_decode() JSON_DUPLICATE_KEY_MERGE merges nested objects and overwrites scalars +--FILE-- + 9, + 'b' => + array ( + 'x' => 1, + 'y' => 2, + ), +) +array ( + 'k' => + array ( + 0 => 1, + 1 => 2, + 2 => 3, + ), +) diff --git a/ext/json/tests/json_decode_legacy_behavior.phpt b/ext/json/tests/json_decode_legacy_behavior.phpt new file mode 100644 index 000000000000..64410729f4e6 --- /dev/null +++ b/ext/json/tests/json_decode_legacy_behavior.phpt @@ -0,0 +1,65 @@ +--TEST-- +json_decode() default flags preserve pre-change duplicate-key and object behavior +--FILE-- + +--EXPECT-- +array ( + 'a' => 3, + 'b' => 2, +) +(object) array( + 'a' => 3, + 'b' => 2, +) +array ( + 'x' => + array ( + 'c' => 3, + ), +) +array ( + 'k' => 42, +) +array ( + 'k' => + array ( + 'inner' => 2, + ), +) +array ( + 'a' => + array ( + 0 => 3, + 1 => 4, + 2 => 5, + ), +) +array ( + 'id' => '10000000000000000000', +)