@@ -950,7 +950,6 @@ PHP_FUNCTION(grapheme_levenshtein)
950
950
RETURN_THROWS ();
951
951
}
952
952
953
- zend_long * p1 , * p2 , * tmp ;
954
953
zend_long c0 , c1 , c2 ;
955
954
zend_long retval ;
956
955
size_t i2 ;
@@ -962,8 +961,7 @@ PHP_FUNCTION(grapheme_levenshtein)
962
961
int32_t ustring1_len = 0 ;
963
962
int32_t ustring2_len = 0 ;
964
963
965
- UErrorCode ustatus1 = U_ZERO_ERROR ;
966
- UErrorCode ustatus2 = U_ZERO_ERROR ;
964
+ UErrorCode ustatus = U_ZERO_ERROR ;
967
965
968
966
/* When all costs are equal, levenshtein fulfills the requirements of a metric, which means
969
967
* that the distance is symmetric. If string1 is shorter than string2 we can save memory (and CPU time)
@@ -977,20 +975,20 @@ PHP_FUNCTION(grapheme_levenshtein)
977
975
pstr1 = ZSTR_VAL (string1 );
978
976
pstr2 = ZSTR_VAL (string2 );
979
977
980
- intl_convert_utf8_to_utf16 (& ustring1 , & ustring1_len , pstr1 , ZSTR_LEN (string1 ), & ustatus1 );
978
+ intl_convert_utf8_to_utf16 (& ustring1 , & ustring1_len , pstr1 , ZSTR_LEN (string1 ), & ustatus );
981
979
982
- if (U_FAILURE (ustatus1 )) {
983
- intl_error_set_code (NULL , ustatus1 );
980
+ if (U_FAILURE (ustatus )) {
981
+ intl_error_set_code (NULL , ustatus );
984
982
985
983
intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
986
984
efree (ustring1 );
987
985
RETURN_FALSE ;
988
986
}
989
987
990
- intl_convert_utf8_to_utf16 (& ustring2 , & ustring2_len , pstr2 , ZSTR_LEN (string2 ), & ustatus2 );
988
+ intl_convert_utf8_to_utf16 (& ustring2 , & ustring2_len , pstr2 , ZSTR_LEN (string2 ), & ustatus );
991
989
992
- if (U_FAILURE (ustatus2 )) {
993
- intl_error_set_code (NULL , ustatus2 );
990
+ if (U_FAILURE (ustatus )) {
991
+ intl_error_set_code (NULL , ustatus );
994
992
995
993
intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
996
994
efree (ustring2 );
@@ -1017,30 +1015,64 @@ PHP_FUNCTION(grapheme_levenshtein)
1017
1015
1018
1016
unsigned char u_break_iterator_buffer1 [U_BRK_SAFECLONE_BUFFERSIZE ];
1019
1017
unsigned char u_break_iterator_buffer2 [U_BRK_SAFECLONE_BUFFERSIZE ];
1020
- bi1 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer1 , & ustatus1 );
1021
- bi2 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer2 , & ustatus2 );
1018
+ bi1 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer1 , & ustatus );
1019
+ if (U_FAILURE (ustatus )) {
1020
+ intl_error_set_code (NULL , ustatus );
1021
+ intl_error_set_custom_msg (NULL , "Error on grapheme_get_break_iterator for argument #1 ($string1)" , 0 );
1022
+ efree (ustring2 );
1023
+ efree (ustring1 );
1024
+ ubrk_close (bi1 );
1025
+ RETURN_FALSE ;
1026
+ }
1022
1027
1023
- ubrk_setText (bi1 , ustring1 , ustring1_len , & ustatus1 );
1028
+ bi2 = grapheme_get_break_iterator (u_break_iterator_buffer2 , & ustatus );
1029
+ if (U_FAILURE (ustatus )) {
1030
+ intl_error_set_code (NULL , ustatus );
1031
+ intl_error_set_custom_msg (NULL , "Error on grapheme_get_break_iterator for argument #2 ($string2)" , 0 );
1032
+ efree (ustring2 );
1033
+ efree (ustring1 );
1034
+ ubrk_close (bi2 );
1035
+ ubrk_close (bi1 );
1036
+ RETURN_FALSE ;
1037
+ }
1038
+ ubrk_setText (bi1 , ustring1 , ustring1_len , & ustatus );
1024
1039
1025
- if (U_FAILURE (ustatus1 )) {
1026
- intl_error_set_code (NULL , ustatus1 );
1040
+ if (U_FAILURE (ustatus )) {
1041
+ intl_error_set_code (NULL , ustatus );
1027
1042
1028
- intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring1 " , 0 );
1043
+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText for argument #1 ($string1) " , 0 );
1029
1044
efree (ustring2 );
1030
1045
efree (ustring1 );
1046
+ ubrk_close (bi2 );
1047
+ ubrk_close (bi1 );
1031
1048
RETURN_FALSE ;
1032
1049
}
1033
1050
1034
- ubrk_setText (bi2 , ustring2 , ustring2_len , & ustatus2 );
1035
- if (U_FAILURE (ustatus2 )) {
1036
- intl_error_set_code (NULL , ustatus2 );
1051
+ ubrk_setText (bi2 , ustring2 , ustring2_len , & ustatus );
1052
+ if (U_FAILURE (ustatus )) {
1053
+ intl_error_set_code (NULL , ustatus );
1037
1054
1038
- intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring2 " , 0 );
1055
+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText for argument #2 ($string2) " , 0 );
1039
1056
efree (ustring2 );
1040
1057
efree (ustring1 );
1058
+ ubrk_close (bi2 );
1059
+ ubrk_close (bi1 );
1041
1060
RETURN_FALSE ;
1042
1061
}
1062
+ UCollator * collator = ucol_open ("" , & ustatus );
1063
+ if (U_FAILURE (ustatus )) {
1064
+ intl_error_set_code (NULL , ustatus );
1043
1065
1066
+ intl_error_set_custom_msg (NULL , "Error on ucol_open" , 0 );
1067
+ efree (ustring2 );
1068
+ efree (ustring1 );
1069
+ ubrk_close (bi2 );
1070
+ ubrk_close (bi1 );
1071
+ ucol_close (collator );
1072
+ RETURN_FALSE ;
1073
+ }
1074
+
1075
+ zend_long * p1 , * p2 , * tmp ;
1044
1076
p1 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
1045
1077
p2 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
1046
1078
@@ -1052,7 +1084,6 @@ PHP_FUNCTION(grapheme_levenshtein)
1052
1084
int32_t current2 = 0 ;
1053
1085
int32_t pos1 = 0 ;
1054
1086
int32_t pos2 = 0 ;
1055
- int32_t usrch_pos = 0 ;
1056
1087
1057
1088
while (true) {
1058
1089
current1 = ubrk_current (bi1 );
@@ -1067,37 +1098,8 @@ PHP_FUNCTION(grapheme_levenshtein)
1067
1098
if (pos2 == UBRK_DONE ) {
1068
1099
break ;
1069
1100
}
1070
- UStringSearch * srch = usearch_open (ustring1 + current1 , pos1 - current1 , ustring2 + current2 , pos2 - current2 , "" , NULL , & ustatus2 );
1071
- if (U_FAILURE (ustatus2 )) {
1072
- intl_error_set_code (NULL , ustatus2 );
1073
- intl_error_set_custom_msg (NULL , "Error usearch_open" , 0 );
1074
- ubrk_close (bi1 );
1075
- ubrk_close (bi2 );
1076
-
1077
- efree (ustring1 );
1078
- efree (ustring2 );
1079
-
1080
- efree (p1 );
1081
- efree (p2 );
1082
- RETURN_FALSE ;
1083
- }
1084
- usrch_pos = usearch_first (srch , & ustatus2 );
1085
- if (U_FAILURE (ustatus2 )) {
1086
- intl_error_set_code (NULL , ustatus2 );
1087
- intl_error_set_custom_msg (NULL , "Error usearch_first" , 0 );
1088
- ubrk_close (bi1 );
1089
- ubrk_close (bi2 );
1090
-
1091
- efree (ustring1 );
1092
- efree (ustring2 );
1093
-
1094
- efree (p1 );
1095
- efree (p2 );
1096
- RETURN_FALSE ;
1097
- }
1098
- usearch_close (srch );
1099
-
1100
- if (usrch_pos != USEARCH_DONE ) {
1101
+ if (ucol_strcoll (collator , ustring1 + current1 , pos1 - current1 , ustring2 + current2 , pos2 - current2 ) == UCOL_EQUAL ) {
1102
+ c0 = p1 [i2 ];
1101
1103
c0 = p1 [i2 ];
1102
1104
} else {
1103
1105
c0 = p1 [i2 ] + cost_rep ;
@@ -1118,6 +1120,8 @@ PHP_FUNCTION(grapheme_levenshtein)
1118
1120
p2 = tmp ;
1119
1121
}
1120
1122
1123
+ ucol_close (collator );
1124
+
1121
1125
ubrk_close (bi1 );
1122
1126
ubrk_close (bi2 );
1123
1127
0 commit comments