Skip to content

Commit 6f41f98

Browse files
committed
Fix usearch to ucol
1 parent e291278 commit 6f41f98

File tree

2 files changed

+66
-52
lines changed

2 files changed

+66
-52
lines changed

Diff for: ext/intl/grapheme/grapheme_string.c

+55-51
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,6 @@ PHP_FUNCTION(grapheme_levenshtein)
950950
RETURN_THROWS();
951951
}
952952

953-
zend_long *p1, *p2, *tmp;
954953
zend_long c0, c1, c2;
955954
zend_long retval;
956955
size_t i2;
@@ -962,8 +961,7 @@ PHP_FUNCTION(grapheme_levenshtein)
962961
int32_t ustring1_len = 0;
963962
int32_t ustring2_len = 0;
964963

965-
UErrorCode ustatus1 = U_ZERO_ERROR;
966-
UErrorCode ustatus2 = U_ZERO_ERROR;
964+
UErrorCode ustatus = U_ZERO_ERROR;
967965

968966
/* When all costs are equal, levenshtein fulfills the requirements of a metric, which means
969967
* that the distance is symmetric. If string1 is shorter than string2 we can save memory (and CPU time)
@@ -977,20 +975,20 @@ PHP_FUNCTION(grapheme_levenshtein)
977975
pstr1 = ZSTR_VAL(string1);
978976
pstr2 = ZSTR_VAL(string2);
979977

980-
intl_convert_utf8_to_utf16(&ustring1, &ustring1_len, pstr1, ZSTR_LEN(string1), &ustatus1);
978+
intl_convert_utf8_to_utf16(&ustring1, &ustring1_len, pstr1, ZSTR_LEN(string1), &ustatus);
981979

982-
if (U_FAILURE(ustatus1)) {
983-
intl_error_set_code(NULL, ustatus1);
980+
if (U_FAILURE(ustatus)) {
981+
intl_error_set_code(NULL, ustatus);
984982

985983
intl_error_set_custom_msg(NULL, "Error converting input string to UTF-16", 0);
986984
efree(ustring1);
987985
RETURN_FALSE;
988986
}
989987

990-
intl_convert_utf8_to_utf16(&ustring2, &ustring2_len, pstr2, ZSTR_LEN(string2), &ustatus2);
988+
intl_convert_utf8_to_utf16(&ustring2, &ustring2_len, pstr2, ZSTR_LEN(string2), &ustatus);
991989

992-
if (U_FAILURE(ustatus2)) {
993-
intl_error_set_code(NULL, ustatus2);
990+
if (U_FAILURE(ustatus)) {
991+
intl_error_set_code(NULL, ustatus);
994992

995993
intl_error_set_custom_msg(NULL, "Error converting input string to UTF-16", 0);
996994
efree(ustring2);
@@ -1017,30 +1015,64 @@ PHP_FUNCTION(grapheme_levenshtein)
10171015

10181016
unsigned char u_break_iterator_buffer1[U_BRK_SAFECLONE_BUFFERSIZE];
10191017
unsigned char u_break_iterator_buffer2[U_BRK_SAFECLONE_BUFFERSIZE];
1020-
bi1 = grapheme_get_break_iterator((void*)u_break_iterator_buffer1, &ustatus1);
1021-
bi2 = grapheme_get_break_iterator((void*)u_break_iterator_buffer2, &ustatus2);
1018+
bi1 = grapheme_get_break_iterator((void*)u_break_iterator_buffer1, &ustatus);
1019+
if (U_FAILURE(ustatus)) {
1020+
intl_error_set_code(NULL, ustatus);
1021+
intl_error_set_custom_msg(NULL, "Error on grapheme_get_break_iterator for argument #1 ($string1)", 0);
1022+
efree(ustring2);
1023+
efree(ustring1);
1024+
ubrk_close(bi1);
1025+
RETURN_FALSE;
1026+
}
10221027

1023-
ubrk_setText(bi1, ustring1, ustring1_len, &ustatus1);
1028+
bi2 = grapheme_get_break_iterator(u_break_iterator_buffer2, &ustatus);
1029+
if (U_FAILURE(ustatus)) {
1030+
intl_error_set_code(NULL, ustatus);
1031+
intl_error_set_custom_msg(NULL, "Error on grapheme_get_break_iterator for argument #2 ($string2)", 0);
1032+
efree(ustring2);
1033+
efree(ustring1);
1034+
ubrk_close(bi2);
1035+
ubrk_close(bi1);
1036+
RETURN_FALSE;
1037+
}
1038+
ubrk_setText(bi1, ustring1, ustring1_len, &ustatus);
10241039

1025-
if (U_FAILURE(ustatus1)) {
1026-
intl_error_set_code(NULL, ustatus1);
1040+
if (U_FAILURE(ustatus)) {
1041+
intl_error_set_code(NULL, ustatus);
10271042

1028-
intl_error_set_custom_msg(NULL, "Error on ubrk_setText on ustring1", 0);
1043+
intl_error_set_custom_msg(NULL, "Error on ubrk_setText for argument #1 ($string1)", 0);
10291044
efree(ustring2);
10301045
efree(ustring1);
1046+
ubrk_close(bi2);
1047+
ubrk_close(bi1);
10311048
RETURN_FALSE;
10321049
}
10331050

1034-
ubrk_setText(bi2, ustring2, ustring2_len, &ustatus2);
1035-
if (U_FAILURE(ustatus2)) {
1036-
intl_error_set_code(NULL, ustatus2);
1051+
ubrk_setText(bi2, ustring2, ustring2_len, &ustatus);
1052+
if (U_FAILURE(ustatus)) {
1053+
intl_error_set_code(NULL, ustatus);
10371054

1038-
intl_error_set_custom_msg(NULL, "Error on ubrk_setText on ustring2", 0);
1055+
intl_error_set_custom_msg(NULL, "Error on ubrk_setText for argument #2 ($string2)", 0);
10391056
efree(ustring2);
10401057
efree(ustring1);
1058+
ubrk_close(bi2);
1059+
ubrk_close(bi1);
10411060
RETURN_FALSE;
10421061
}
1062+
UCollator *collator = ucol_open("", &ustatus);
1063+
if (U_FAILURE(ustatus)) {
1064+
intl_error_set_code(NULL, ustatus);
10431065

1066+
intl_error_set_custom_msg(NULL, "Error on ucol_open", 0);
1067+
efree(ustring2);
1068+
efree(ustring1);
1069+
ubrk_close(bi2);
1070+
ubrk_close(bi1);
1071+
ucol_close(collator);
1072+
RETURN_FALSE;
1073+
}
1074+
1075+
zend_long *p1, *p2, *tmp;
10441076
p1 = safe_emalloc(strlen_2 + 1, sizeof(zend_long), 0);
10451077
p2 = safe_emalloc(strlen_2 + 1, sizeof(zend_long), 0);
10461078

@@ -1052,7 +1084,6 @@ PHP_FUNCTION(grapheme_levenshtein)
10521084
int32_t current2 = 0;
10531085
int32_t pos1 = 0;
10541086
int32_t pos2 = 0;
1055-
int32_t usrch_pos = 0;
10561087

10571088
while (true) {
10581089
current1 = ubrk_current(bi1);
@@ -1067,37 +1098,8 @@ PHP_FUNCTION(grapheme_levenshtein)
10671098
if (pos2 == UBRK_DONE) {
10681099
break;
10691100
}
1070-
UStringSearch *srch = usearch_open(ustring1 + current1, pos1 - current1, ustring2 + current2, pos2 - current2, "", NULL, &ustatus2);
1071-
if (U_FAILURE(ustatus2)) {
1072-
intl_error_set_code(NULL, ustatus2);
1073-
intl_error_set_custom_msg(NULL, "Error usearch_open", 0);
1074-
ubrk_close(bi1);
1075-
ubrk_close(bi2);
1076-
1077-
efree(ustring1);
1078-
efree(ustring2);
1079-
1080-
efree(p1);
1081-
efree(p2);
1082-
RETURN_FALSE;
1083-
}
1084-
usrch_pos = usearch_first(srch, &ustatus2);
1085-
if (U_FAILURE(ustatus2)) {
1086-
intl_error_set_code(NULL, ustatus2);
1087-
intl_error_set_custom_msg(NULL, "Error usearch_first", 0);
1088-
ubrk_close(bi1);
1089-
ubrk_close(bi2);
1090-
1091-
efree(ustring1);
1092-
efree(ustring2);
1093-
1094-
efree(p1);
1095-
efree(p2);
1096-
RETURN_FALSE;
1097-
}
1098-
usearch_close(srch);
1099-
1100-
if (usrch_pos != USEARCH_DONE) {
1101+
if (ucol_strcoll(collator, ustring1 + current1, pos1 - current1, ustring2 + current2, pos2 - current2) == UCOL_EQUAL) {
1102+
c0 = p1[i2];
11011103
c0 = p1[i2];
11021104
} else {
11031105
c0 = p1[i2] + cost_rep;
@@ -1118,6 +1120,8 @@ PHP_FUNCTION(grapheme_levenshtein)
11181120
p2 = tmp;
11191121
}
11201122

1123+
ucol_close(collator);
1124+
11211125
ubrk_close(bi1);
11221126
ubrk_close(bi2);
11231127

Diff for: ext/intl/php_intl_arginfo.h

+11-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)