forked from kangjianwei/LearningJDK
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCharSequence.java
296 lines (272 loc) · 11.8 KB
/
CharSequence.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/*
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.lang;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.PrimitiveIterator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.IntConsumer;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
/**
* A {@code CharSequence} is a readable sequence of {@code char} values. This
* interface provides uniform, read-only access to many different kinds of
* {@code char} sequences.
* A {@code char} value represents a character in the <i>Basic
* Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
* href="Character.html#unicode">Unicode Character Representation</a> for details.
*
* <p> This interface does not refine the general contracts of the {@link
* java.lang.Object#equals(java.lang.Object) equals} and {@link
* java.lang.Object#hashCode() hashCode} methods. The result of testing two objects
* that implement {@code CharSequence} for equality is therefore, in general, undefined.
* Each object may be implemented by a different class, and there
* is no guarantee that each class will be capable of testing its instances
* for equality with those of the other. It is therefore inappropriate to use
* arbitrary {@code CharSequence} instances as elements in a set or as keys in
* a map. </p>
*
* @author Mike McCloskey
* @spec JSR-51
* @since 1.4
*/
// 字符序列接口,封装了对字符序列的一些操作(包括转换为流的操作)
public interface CharSequence {
/**
* Returns the length of this character sequence. The length is the number
* of 16-bit {@code char}s in the sequence.
*
* @return the number of {@code char}s in this sequence
*/
// 返回字符序列长度
int length();
/**
* Returns the {@code char} value at the specified index. An index ranges from zero
* to {@code length() - 1}. The first {@code char} value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* <p>If the {@code char} value specified by the index is a
* <a href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate</a>, the surrogate
* value is returned.
*
* @param index the index of the {@code char} value to be returned
*
* @return the specified {@code char} value
*
* @throws IndexOutOfBoundsException if the {@code index} argument is negative or not less than
* {@code length()}
*/
// 返回索引index处的字符
char charAt(int index);
/**
* Returns a {@code CharSequence} that is a subsequence of this sequence.
* The subsequence starts with the {@code char} value at the specified index and
* ends with the {@code char} value at index {@code end - 1}. The length
* (in {@code char}s) of the
* returned sequence is {@code end - start}, so if {@code start == end}
* then an empty sequence is returned.
*
* @param start the start index, inclusive
* @param end the end index, exclusive
*
* @return the specified subsequence
*
* @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative,
* if {@code end} is greater than {@code length()},
* or if {@code start} is greater than {@code end}
*/
// 返回该字符序列的子序列
CharSequence subSequence(int start, int end);
/**
* Returns a string containing the characters in this sequence in the same
* order as this sequence. The length of the string will be the length of
* this sequence.
*
* @return a string consisting of exactly this sequence of characters
*/
// 返回由这个字符序列组成的字符串
String toString();
/**
* Compares two {@code CharSequence} instances lexicographically. Returns a
* negative value, zero, or a positive value if the first sequence is lexicographically
* less than, equal to, or greater than the second, respectively.
*
* <p>
* The lexicographical ordering of {@code CharSequence} is defined as follows.
* Consider a {@code CharSequence} <i>cs</i> of length <i>len</i> to be a
* sequence of char values, <i>cs[0]</i> to <i>cs[len-1]</i>. Suppose <i>k</i>
* is the lowest index at which the corresponding char values from each sequence
* differ. The lexicographic ordering of the sequences is determined by a numeric
* comparison of the char values <i>cs1[k]</i> with <i>cs2[k]</i>. If there is
* no such index <i>k</i>, the shorter sequence is considered lexicographically
* less than the other. If the sequences have the same length, the sequences are
* considered lexicographically equal.
*
* @param cs1 the first {@code CharSequence}
* @param cs2 the second {@code CharSequence}
*
* @return the value {@code 0} if the two {@code CharSequence} are equal;
* a negative integer if the first {@code CharSequence}
* is lexicographically less than the second; or a
* positive integer if the first {@code CharSequence} is
* lexicographically greater than the second.
*
* @since 11
*/
// 按字典顺序比较两个字符序列
@SuppressWarnings("unchecked")
static int compare(CharSequence cs1, CharSequence cs2) {
if(Objects.requireNonNull(cs1) == Objects.requireNonNull(cs2)) {
return 0;
}
if(cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) {
return ((Comparable<Object>) cs1).compareTo(cs2);
}
for(int i = 0, len = Math.min(cs1.length(), cs2.length()); i<len; i++) {
char a = cs1.charAt(i);
char b = cs2.charAt(i);
if(a != b) {
return a - b;
}
}
return cs1.length() - cs2.length();
}
/**
* Returns a stream of {@code int} zero-extending the {@code char} values
* from this sequence. Any char which maps to a <a
* href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code
* point</a> is passed through uninterpreted.
*
* <p>The stream binds to this sequence when the terminal stream operation
* commences (specifically, for mutable sequences the spliterator for the
* stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
* If the sequence is modified during that operation then the result is
* undefined.
*
* @return an IntStream of char values from this sequence
*
* @since 1.8
*/
// 将当前char序列转为流序列,序列中每个元素是char
default IntStream chars() {
class CharIterator implements PrimitiveIterator.OfInt {
int cur = 0;
public boolean hasNext() {
return cur<length();
}
public int nextInt() {
if(hasNext()) {
return charAt(cur++);
} else {
throw new NoSuchElementException();
}
}
@Override
public void forEachRemaining(IntConsumer block) {
for(; cur<length(); cur++) {
block.accept(charAt(cur));
}
}
}
return StreamSupport.intStream(
() -> Spliterators.spliterator(new CharIterator(), length(), Spliterator.ORDERED),
Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
false
);
}
/**
* Returns a stream of code point values from this sequence. Any surrogate
* pairs encountered in the sequence are combined as if by {@linkplain
* Character#toCodePoint Character.toCodePoint} and the result is passed
* to the stream. Any other code units, including ordinary BMP characters,
* unpaired surrogates, and undefined code units, are zero-extended to
* {@code int} values which are then passed to the stream.
*
* <p>The stream binds to this sequence when the terminal stream operation
* commences (specifically, for mutable sequences the spliterator for the
* stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
* If the sequence is modified during that operation then the result is
* undefined.
*
* @return an IntStream of Unicode code points from this sequence
*
* @since 1.8
*/
// 将当前Unicode符号序列转为流序列,序列中每个元素是Unicode符号
default IntStream codePoints() {
class CodePointIterator implements PrimitiveIterator.OfInt {
int cur = 0;
public boolean hasNext() {
return cur<length();
}
public int nextInt() {
final int length = length();
if(cur >= length) {
throw new NoSuchElementException();
}
char c1 = charAt(cur++);
if(Character.isHighSurrogate(c1) && cur<length) {
char c2 = charAt(cur);
if(Character.isLowSurrogate(c2)) {
cur++;
// 返回值是Unicode符号编码值
return Character.toCodePoint(c1, c2);
}
}
return c1;
}
@Override
public void forEachRemaining(IntConsumer block) {
final int length = length();
int i = cur;
try {
while(i<length) {
char c1 = charAt(i++);
if(!Character.isHighSurrogate(c1) || i >= length) {
block.accept(c1);
} else {
char c2 = charAt(i);
if(Character.isLowSurrogate(c2)) {
i++;
block.accept(Character.toCodePoint(c1, c2));
} else {
block.accept(c1);
}
}
}
} finally {
cur = i;
}
}
}
return StreamSupport.intStream(
() -> Spliterators.spliteratorUnknownSize(new CodePointIterator(), Spliterator.ORDERED),
Spliterator.ORDERED,
false
);
}
}