Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/intl/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ if test "$PHP_INTL" != "no"; then
INTL_COMMON_FLAGS="$ICU_CFLAGS -Wno-write-strings -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
PHP_NEW_EXTENSION([intl], m4_normalize([
intl_convert.c
intl_icu_compat.c
intl_error.c
php_intl.c
]),
Expand Down
2 changes: 1 addition & 1 deletion ext/intl/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (PHP_INTL != "no") {
CHECK_LIB("icuuc.lib", "intl", PHP_INTL) &&
CHECK_HEADER("unicode/utf.h", "CFLAGS_INTL")) {
// always build as shared - zend_strtod.c/ICU type conflict
EXTENSION("intl", "php_intl.c intl_convert.c intl_convertcpp.cpp intl_error.c ", true,
EXTENSION("intl", "php_intl.c intl_convert.c intl_icu_compat.c intl_convertcpp.cpp intl_error.c ", true,
"/I \"" + configure_module_dirname + "\" /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
ADD_EXTENSION_DEP('intl', 'date');
ADD_SOURCES(configure_module_dirname + "/collator", "\
Expand Down
14 changes: 4 additions & 10 deletions ext/intl/converter/converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <unicode/ucnv.h>
#include <unicode/ustring.h>

#include "../intl_icu_compat.h"

extern "C" {
#include "converter.h"
#include "php_intl.h"
Expand Down Expand Up @@ -949,18 +951,10 @@ static zend_object *php_converter_clone_object(zend_object *object) {
zend_object *retval = php_converter_object_ctor(object->ce, &objval);
UErrorCode error = U_ZERO_ERROR;

#if U_ICU_VERSION_MAJOR_NUM > 70
objval->src = ucnv_clone(oldobj->src, &error);
#else
objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
#endif
objval->src = intl_icu_compat_ucnv_clone(oldobj->src, &error);
if (U_SUCCESS(error)) {
error = U_ZERO_ERROR;
#if U_ICU_VERSION_MAJOR_NUM > 70
objval->dest = ucnv_clone(oldobj->dest, &error);
#else
objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
#endif
objval->dest = intl_icu_compat_ucnv_clone(oldobj->dest, &error);
}

if (U_FAILURE(error)) {
Expand Down
18 changes: 6 additions & 12 deletions ext/intl/grapheme/grapheme_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ U_CFUNC PHP_FUNCTION(grapheme_substr)
int32_t start = 0;
int iter_val;
UErrorCode status;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UBreakIterator* bi = nullptr;
int sub_str_start_pos, sub_str_end_pos;
int32_t (*iter_func)(UBreakIterator *);
Expand Down Expand Up @@ -407,7 +406,7 @@ U_CFUNC PHP_FUNCTION(grapheme_substr)
RETURN_FALSE;
}

bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
bi = grapheme_get_break_iterator(&status);

if( U_FAILURE(status) ) {
RETURN_FALSE;
Expand Down Expand Up @@ -729,7 +728,6 @@ U_CFUNC PHP_FUNCTION(grapheme_extract)
int32_t start = 0;
zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
UErrorCode status;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UBreakIterator* bi = nullptr;
int ret_pos;
zval *next = nullptr; /* return offset of next part of the string */
Expand Down Expand Up @@ -829,7 +827,7 @@ U_CFUNC PHP_FUNCTION(grapheme_extract)

bi = nullptr;
status = U_ZERO_ERROR;
bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
bi = grapheme_get_break_iterator(&status);

ubrk_setUText(bi, &ut, &status);
/* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
Expand All @@ -855,7 +853,6 @@ U_CFUNC PHP_FUNCTION(grapheme_str_split)
zend_string *str;
zend_long split_len = 1;

unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UErrorCode ustatus = U_ZERO_ERROR;
int32_t pos, current, i, end_len = 0;
UBreakIterator* bi;
Expand Down Expand Up @@ -891,7 +888,7 @@ U_CFUNC PHP_FUNCTION(grapheme_str_split)

bi = nullptr;
ustatus = U_ZERO_ERROR;
bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &ustatus );
bi = grapheme_get_break_iterator(&ustatus);

if( U_FAILURE(ustatus) ) {
RETURN_FALSE;
Expand Down Expand Up @@ -1031,17 +1028,15 @@ U_CFUNC PHP_FUNCTION(grapheme_levenshtein)
goto out_ustring2;
}

unsigned char u_break_iterator_buffer1[U_BRK_SAFECLONE_BUFFERSIZE];
unsigned char u_break_iterator_buffer2[U_BRK_SAFECLONE_BUFFERSIZE];
bi1 = grapheme_get_break_iterator(u_break_iterator_buffer1, &ustatus);
bi1 = grapheme_get_break_iterator(&ustatus);
if (U_FAILURE(ustatus)) {
intl_error_set_code(NULL, ustatus);
intl_error_set_custom_msg(NULL, "Error on grapheme_get_break_iterator for argument #1 ($string1)");
RETVAL_FALSE;
goto out_bi1;
}

bi2 = grapheme_get_break_iterator(u_break_iterator_buffer2, &ustatus);
bi2 = grapheme_get_break_iterator(&ustatus);
if (U_FAILURE(ustatus)) {
intl_error_set_code(NULL, ustatus);
intl_error_set_custom_msg(NULL, "Error on grapheme_get_break_iterator for argument #2 ($string2)");
Expand Down Expand Up @@ -1144,7 +1139,6 @@ U_CFUNC PHP_FUNCTION(grapheme_strrev)
char *pstr, *end, *p;
zend_string *ret;
int32_t pos = 0, current = 0, end_len = 0;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];

ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STR(string)
Expand All @@ -1168,7 +1162,7 @@ U_CFUNC PHP_FUNCTION(grapheme_strrev)
bi = nullptr;
ustatus = U_ZERO_ERROR;

bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &ustatus );
bi = grapheme_get_break_iterator(&ustatus);
ret = zend_string_alloc(ZSTR_LEN(string), 0);
p = ZSTR_VAL(ret);

Expand Down
18 changes: 6 additions & 12 deletions ext/intl/grapheme/grapheme_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ extern "C" {
#include <unicode/ubrk.h>
#include <unicode/usearch.h>

#include "../intl_icu_compat.h"

ZEND_EXTERN_MODULE_GLOBALS( intl )

/* }}} */
Expand Down Expand Up @@ -105,7 +107,6 @@ U_CFUNC int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char
{
UChar *uhaystack = NULL, *uneedle = NULL;
int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UBreakIterator* bi = NULL;
UErrorCode status;
UStringSearch* src = NULL;
Expand All @@ -125,7 +126,7 @@ U_CFUNC int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char

/* get a pointer to the haystack taking into account the offset */
status = U_ZERO_ERROR;
bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
bi = grapheme_get_break_iterator(&status);
STRPOS_CHECK_STATUS(status, "Failed to get iterator");
status = U_ZERO_ERROR;
ubrk_setText(bi, uhaystack, uhaystack_len, &status);
Expand Down Expand Up @@ -235,12 +236,11 @@ U_CFUNC zend_long grapheme_ascii_check(const unsigned char *day, size_t len)
/* {{{ grapheme_split_string: find and optionally return grapheme boundaries */
U_CFUNC int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len )
{
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UErrorCode status = U_ZERO_ERROR;
int ret_len, pos;
UBreakIterator* bi;

bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
bi = grapheme_get_break_iterator(&status);

if( U_FAILURE(status) ) {
return -1;
Expand Down Expand Up @@ -375,7 +375,7 @@ U_CFUNC zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, ch
/* }}} */

/* {{{ grapheme_get_break_iterator: get a clone of the global character break iterator */
U_CFUNC UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status )
U_CFUNC UBreakIterator* grapheme_get_break_iterator(UErrorCode *status )
{
UBreakIterator *global_break_iterator = INTL_G( grapheme_iterator );

Expand All @@ -390,12 +390,6 @@ U_CFUNC UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCo
INTL_G(grapheme_iterator) = global_break_iterator;
}

#if U_ICU_VERSION_MAJOR_NUM >= 69
return ubrk_clone(global_break_iterator, status);
#else
int32_t buffer_size = U_BRK_SAFECLONE_BUFFERSIZE;

return ubrk_safeClone(global_break_iterator, stack_buffer, &buffer_size, status);
#endif
return intl_icu_compat_ubrk_clone(global_break_iterator, status);
}
/* }}} */
4 changes: 2 additions & 2 deletions ext/intl/grapheme/grapheme_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ extern "C" {
#include "intl_convert.h"

/* get_break_interator: get a break iterator from the global structure */
UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status );
UBreakIterator* grapheme_get_break_iterator(UErrorCode *status );

zend_long grapheme_ascii_check(const unsigned char *day, size_t len);
void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char **sub_str, int32_t *sub_str_len);
Expand All @@ -37,7 +37,7 @@ int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t stri

int32_t grapheme_get_haystack_offset(UBreakIterator* bi, int32_t offset);

UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status );
UBreakIterator* grapheme_get_break_iterator(UErrorCode *status );
#ifdef __cplusplus
}
#endif
Expand Down
114 changes: 114 additions & 0 deletions ext/intl/intl_icu_compat.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to the Modified BSD License that is |
| bundled with this package in the file LICENSE, and is available |
| through the World Wide Web at <https://www.php.net/license/>. |
| |
| SPDX-License-Identifier: BSD-3-Clause |
+----------------------------------------------------------------------+
| Authors: Weilin Du <weilindu@php.net> |
+----------------------------------------------------------------------+
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stddef.h>

#include "intl_icu_compat.h"
#include <unicode/ubrk.h>
#include <unicode/ucnv.h>
#include <unicode/uspoof.h>
#include <unicode/uset.h>

UConverter *intl_icu_compat_ucnv_clone(const UConverter *converter, UErrorCode *status)
{
#if INTL_ICU_HAS_UCNV_CLONE
return ucnv_clone(converter, status);
#else
return ucnv_safeClone(converter, NULL, NULL, status);
#endif
}

UBreakIterator *intl_icu_compat_ubrk_clone(const UBreakIterator *break_iterator, UErrorCode *status)
{
#if INTL_ICU_HAS_UBRK_CLONE
return ubrk_clone(break_iterator, status);
#else
return ubrk_safeClone(break_iterator, NULL, NULL, status);
#endif
}

void intl_icu_compat_uspoof_init_checker(USpoofChecker *checker, IntlIcuSpoofCheckResult **check_result, UErrorCode *status)
{
#if INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT
/* ICU 58 removed WSC/MSC handling in favor of TR39 restriction levels.
* Keep the default highly restrictive behavior and allocate the extended
* check result used by uspoof_check2*(). */
uspoof_setRestrictionLevel(checker, USPOOF_HIGHLY_RESTRICTIVE);
*check_result = uspoof_openCheckResult(status);
#else
int checks = uspoof_getChecks(checker, status);

/* Single-script enforcement rejects legitimate mixed-script languages like Japanese. */
uspoof_setChecks(checker, checks & ~USPOOF_SINGLE_SCRIPT, status);
*check_result = NULL;
#endif
}

void intl_icu_compat_uspoof_close_check_result(IntlIcuSpoofCheckResult *check_result)
{
#if INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT
if (check_result) {
uspoof_closeCheckResult(check_result);
}
#else
(void) check_result;
#endif
}

int32_t intl_icu_compat_uspoof_check_utf8(const USpoofChecker *checker, const char *text, int32_t length, IntlIcuSpoofCheckResult *check_result, UErrorCode *status)
{
#if INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT
return uspoof_check2UTF8(checker, text, length, check_result, status);
#else
(void) check_result;
return uspoof_checkUTF8(checker, text, length, NULL, status);
#endif
}

UBool intl_icu_compat_uspoof_check_result_mismatch(IntlIcuSpoofCheckResult *check_result, int32_t checks, int32_t *result_checks, UErrorCode *status)
{
#if INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT
*result_checks = uspoof_getCheckResultChecks(check_result, status);
return *result_checks != checks;
#else
(void) check_result;
(void) status;
*result_checks = checks;
return 0;
#endif
}

UBool intl_icu_compat_uspoof_is_allowed_chars_pattern_option(int64_t pattern_option)
{
return pattern_option == 0
|| pattern_option == USET_IGNORE_SPACE
#if INTL_ICU_HAS_USET_SIMPLE_CASE_INSENSITIVE
|| pattern_option == (USET_IGNORE_SPACE|USET_SIMPLE_CASE_INSENSITIVE)
#endif
|| pattern_option == (USET_IGNORE_SPACE|USET_CASE_INSENSITIVE)
|| pattern_option == (USET_IGNORE_SPACE|USET_ADD_CASE_MAPPINGS);
}

const char *intl_icu_compat_uspoof_allowed_chars_pattern_option_error_message(void)
{
#if INTL_ICU_HAS_USET_SIMPLE_CASE_INSENSITIVE
return "must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::CASE_INSENSITIVE or SpoofChecker::ADD_CASE_MAPPINGS or SpoofChecker::SIMPLE_CASE_INSENSITIVE))";
#else
return "must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::CASE_INSENSITIVE or SpoofChecker::ADD_CASE_MAPPINGS))";
#endif
}
56 changes: 56 additions & 0 deletions ext/intl/intl_icu_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to the Modified BSD License that is |
| bundled with this package in the file LICENSE, and is available |
| through the World Wide Web at <https://www.php.net/license/>. |
| |
| SPDX-License-Identifier: BSD-3-Clause |
+----------------------------------------------------------------------+
| Authors: Weilin Du <weilindu@php.net> |
+----------------------------------------------------------------------+
*/

#ifndef INTL_ICU_COMPAT_H
#define INTL_ICU_COMPAT_H

#include <unicode/ubrk.h>
#include <unicode/ucnv.h>
#include <unicode/uspoof.h>
#include <unicode/utypes.h>
#include <unicode/uversion.h>

#define INTL_ICU_VERSION_AT_LEAST(major, minor) \
(U_ICU_VERSION_MAJOR_NUM > (major) || \
(U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor)))

#define INTL_ICU_HAS_UBRK_CLONE INTL_ICU_VERSION_AT_LEAST(69, 0)
#define INTL_ICU_HAS_UCNV_CLONE INTL_ICU_VERSION_AT_LEAST(71, 0)
#define INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT INTL_ICU_VERSION_AT_LEAST(58, 0)
#define INTL_ICU_HAS_USET_SIMPLE_CASE_INSENSITIVE INTL_ICU_VERSION_AT_LEAST(73, 0)

#if INTL_ICU_HAS_SPOOFCHECKER_CHECK_RESULT
typedef USpoofCheckResult IntlIcuSpoofCheckResult;
#else
typedef void IntlIcuSpoofCheckResult;
#endif

#ifdef __cplusplus
extern "C" {
#endif

UConverter *intl_icu_compat_ucnv_clone(const UConverter *converter, UErrorCode *status);
UBreakIterator *intl_icu_compat_ubrk_clone(const UBreakIterator *break_iterator, UErrorCode *status);
void intl_icu_compat_uspoof_init_checker(USpoofChecker *checker, IntlIcuSpoofCheckResult **check_result, UErrorCode *status);
void intl_icu_compat_uspoof_close_check_result(IntlIcuSpoofCheckResult *check_result);
int32_t intl_icu_compat_uspoof_check_utf8(const USpoofChecker *checker, const char *text, int32_t length, IntlIcuSpoofCheckResult *check_result, UErrorCode *status);
UBool intl_icu_compat_uspoof_check_result_mismatch(IntlIcuSpoofCheckResult *check_result, int32_t checks, int32_t *result_checks, UErrorCode *status);
UBool intl_icu_compat_uspoof_is_allowed_chars_pattern_option(int64_t pattern_option);
const char *intl_icu_compat_uspoof_allowed_chars_pattern_option_error_message(void);

#ifdef __cplusplus
}
#endif

#endif
Loading
Loading