package java.lang;
import java.io.ObjectStreamField;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import
import java.util.Formatter;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
- The <code>String</code> class represents character strings.
- JAVA程序中的所有的string字符,比如"abc",是作为String类的实例实现的。
- Strings 是常量;其值在创建后不能在有改变。String buffers支持不可变字符串。
- 因为String 对象是不可变的,因此他们可以被共享。比如:
String str = "abc";
- 相当于
char data[] = {'a', 'b', 'c'};
String str = new String(data);
- 下面是更多的使用用例:
System.out.println("abc");
String cde = "cde";
System.out.println("abc" + cde);
String c = "abc".substring(2,3);
String d = cde.substring(1, 2);
-
String类包括检验单个字符的序列的方法,用于字符串比较、字符串搜索、提取子字符串,以及将字符串转化为大写或小写的字符串副本。
-
Java语言为字符串连接操作符(+)提供了特殊支持,以及支持将其他对象转化为字符串。字符串连接是由StringBuffer类和它的append方法实现的。
-
toString()方法:由Object定义,被Java中的所有类继承。
-
Java语言规范
-
除非传入构造器的参数为null,或者该类中的方法会导致NullPointerException的抛出。
-
索引值表示代码单元,补充字符在字符串中占用了两个位置。String类除了处理Unicode代码单元的方法外,也为处理Unicode守则要点提供了方法
public final class String
implements java.io.Serializable, Comparable<String>, CharSequence {
private final char value[]; /** 用于存储char字符 */
private int hash; /** 为字符串缓存哈希值 */
// 默认为值0
private static final long serialVersionUID = -6849794470754667710L; /**使用JDK 1.0.2中的UID*/
/**
* String类是序列化流协议的特殊包中的。
* 一个字符串实例最初被写成ObjectOutputStream格式的,具体如下:
*/
private static final ObjectStreamField[] serialPersistentFields =
new ObjectStreamField[0];
* 初始化一个新建的对象,该对象表示一个空子序列。由于String是不可变的,因此其构造器不是必须的。
public String() {
this.value = new char[0];
}
* 初始化一个新建的对象来表示相同的字符序列作为参数,换句话说,新建的字符串是参数字符串的拷贝。除非显式拷贝是必要的,该构造器的使用是无用的。
public String(String original) {
this.value = original.value;
this.hash = original.hash;
}
-
分配一个新的字符串用于表示字符数组参数包含的字符序列。拷贝字符数组的内容;字符数组的后续修改不会影响新建的字符串。
public String(char value[]) { this.value = Arrays.copyOf(value, value.length); }
-
分配一个新数组用于保存字符数组参数的子数组的内容。参数为 子数组的第一个字符的索引,并且参数count描述了子数组的长度。拷贝子数组的内容;字符数组的后续修改不影响新建的字符串。
-
@param value :源字符数组
-
@param offset :初始位移
-
@param count :字符长度
-
@throws IndexOutOfBoundsException : 如果参数offset和count的索引字符超出了数组界限,即抛出异常
public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); }
* 分配一个新的字符串用于保存参数(Unicode code point)的子数组中的字符。
-
参数offset是子数组的第一个code point的索引值,参数count描述了子数组的长度。子数组的内容被转换为char数组,字符数组的后续修改不影响新建的字符串。
* @param codePoints :Unicode code points 的源数组
* @param offset :初始位移
* @param count :源数组的长度
* @throws IllegalArgumentException :
如果 codePoints中存在任何无用的Unicode code poin,触发该异常}
* @throws IndexOutOfBoundsException :
如果参数offset和count的索引字符超出了数组界限,即抛出异常
public String(int[] codePoints, int offset, int count) {
if (offset < 0) {
throw new StringIndexOutOfBoundsException(offset);
}
if (count < 0) {
throw new StringIndexOutOfBoundsException(count);
}
// Note: offset or count might be near -1>>>1.
if (offset > codePoints.length - count) {
throw new StringIndexOutOfBoundsException(offset + count);
}
final int end = offset + count;
// Pass 1: 计算 char[]的精确长度
int n = count;
for (int i = offset; i < end; i++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
continue;
else if (Character.isValidCodePoint(c))
n++;
else throw new IllegalArgumentException(Integer.toString(c));
}
// Pass 2: 分配以及填充char[]
final char[] v = new char[n];
for (int i = offset, j = 0; i < end; i++, j++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
v[j] = (char)c;
else
Character.toSurrogates(c, v, j++);
}
this.value = v;
}
* 分配一个新的字符串,由8位整型值的数组的子数组构造
* 参数offset是子数组第一个字节的索引,参数count描述了子数组的长度。
* 子数组的每个字节被转化为char,如上面描述的方法
* @deprecated 方法并不能将字节转化为字符。
* 在 JDK 1.1中, 更侧重于使用String构造器来转化,字符集名称或平台默认的字符集
* @param ascii :要被转化为字符的字节
* @param hibyte :每个16比特的Unicode code unit 的前8比特
* @param offset :初始位移
* @param count :字节长度
* @throws IndexOutOfBoundsException :参数offset和count的值是无用的
* @see #String(byte[], int)
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte, int offset, int count) {
checkBounds(ascii, offset, count);
char value[] = new char[count];
if (hibyte == 0) {
for (int i = count; i-- > 0;) {
value[i] = (char)(ascii[i + offset] & 0xff);
}
} else {
hibyte <<= 8;
for (int i = count; i-- > 0;) {
value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
}
}
this.value = value;
}
/**
* Allocates a new {@code String} containing characters constructed from
* an array of 8-bit integer values. Each character <i>c</i>in the
* resulting string is constructed from the corresponding component
* <i>b</i> in the byte array such that:
*
* <blockquote><pre>
* <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
* | (<b><i>b</i></b> & 0xff))
* </pre></blockquote>
*
* @deprecated This method does not properly convert bytes into
* characters. As of JDK 1.1, the preferred way to do this is via the
* {@code String} constructors that take a {@link
* java.nio.charset.Charset}, charset name, or that use the platform's
* default charset.
*
* @param ascii
* The bytes to be converted to characters
*
* @param hibyte
* The top 8 bits of each 16-bit Unicode code unit
*
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte) {
this(ascii, hibyte, 0, ascii.length);
}
/* Common private utility method used to bounds check the byte array
* and requested offset & length values used by the String(byte[],..)
* constructors.
*/
private static void checkBounds(byte[] bytes, int offset, int length) {
if (length < 0)
throw new StringIndexOutOfBoundsException(length);
if (offset < 0)
throw new StringIndexOutOfBoundsException(offset);
if (offset > bytes.length - length)
throw new StringIndexOutOfBoundsException(offset + length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified charset. The length of the new {@code String}
* is a function of the charset, and hence may not be equal to the length
* of the subarray.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null)
throw new NullPointerException("charsetName");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charsetName, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the subarray.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since 1.6
*/
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException("charset");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charset, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The
* length of the new {@code String} is a function of the charset, and hence
* may not be equal to the length of the byte array.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public String(byte bytes[], String charsetName)
throws UnsupportedEncodingException {
this(bytes, 0, bytes.length, charsetName);
}
/**
* Constructs a new {@code String} by decoding the specified array of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the byte array.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @since 1.6
*/
public String(byte bytes[], Charset charset) {
this(bytes, 0, bytes.length, charset);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the platform's default charset. The length of the new
* {@code String} is a function of the charset, and hence may not be equal
* to the length of the subarray.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and the {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length) {
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the platform's default charset. The length of the new {@code
* String} is a function of the charset, and hence may not be equal to the
* length of the byte array.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @since JDK1.1
*/
public String(byte bytes[]) {
this(bytes, 0, bytes.length);
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string buffer argument. The contents of the
* string buffer are copied; subsequent modification of the string buffer
* does not affect the newly created string.
*
* @param buffer
* A {@code StringBuffer}
*/
public String(StringBuffer buffer) {
synchronized(buffer) {
this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
}
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string builder argument. The contents of the
* string builder are copied; subsequent modification of the string builder
* does not affect the newly created string.
*
* <p> This constructor is provided to ease migration to {@code
* StringBuilder}. Obtaining a string from a string builder via the {@code
* toString} method is likely to run faster and is generally preferred.
*
* @param builder
* A {@code StringBuilder}
*
* @since 1.5
*/
public String(StringBuilder builder) {
this.value = Arrays.copyOf(builder.getValue(), builder.length());
}
/*
* Package private constructor which shares value array for speed.
* this constructor is always expected to be called with share==true.
* a separate constructor is needed because we already have a public
* String(char[]) constructor that makes a copy of the given char[].
*/
String(char[] value, boolean share) {
// assert share : "unshared not supported";
this.value = value;
}
/**
* Package private constructor
*
* @deprecated Use {@link #String(char[],int,int)} instead.
*/
@Deprecated
String(int offset, int count, char[] value) {
this(value, offset, count);
}
/**
* Returns the length of this string.
* The length is equal to the number of <a href="Character.html#unicode">Unicode
* code units</a> in the string.
*
* @return the length of the sequence of characters represented by this
* object.
*/
public int length() {
return value.length;
}
/**
* Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>.
*
* @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise
* <tt>false</tt>
*
* @since 1.6
*/
public boolean isEmpty() {
return value.length == 0;
}
/**
* Returns the <code>char</code> value at the
* specified index. An index ranges from <code>0</code> to
* <code>length() - 1</code>. The first <code>char</code> value of the sequence
* is at index <code>0</code>, the next at index <code>1</code>,
* and so on, as for array indexing.
*
* <p>If the <code>char</code> value specified by the index is a
* <a href="Character.html#unicode">surrogate</a>, the surrogate
* value is returned.
*
* @param index the index of the <code>char</code> value.
* @return the <code>char</code> value at the specified index of this string.
* The first <code>char</code> value is at index <code>0</code>.
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is negative or not less than the length of this
* string.
*/
public char charAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return value[index];
}
/**
* Returns the character (Unicode code point) at the specified
* index. The index refers to <code>char</code> values
* (Unicode code units) and ranges from <code>0</code> to
* {@link #length()}<code> - 1</code>.
*
* <p> If the <code>char</code> value specified at the given index
* is in the high-surrogate range, the following index is less
* than the length of this <code>String</code>, and the
* <code>char</code> value at the following index is in the
* low-surrogate range, then the supplementary code point
* corresponding to this surrogate pair is returned. Otherwise,
* the <code>char</code> value at the given index is returned.
*
* @param index the index to the <code>char</code> values
* @return the code point value of the character at the
* <code>index</code>
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is negative or not less than the length of this
* string.
* @since 1.5
*/
public int codePointAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return Character.codePointAtImpl(value, index, value.length);
}
/**
* Returns the character (Unicode code point) before the specified
* index. The index refers to <code>char</code> values
* (Unicode code units) and ranges from <code>1</code> to {@link
* CharSequence#length() length}.
*
* <p> If the <code>char</code> value at <code>(index - 1)</code>
* is in the low-surrogate range, <code>(index - 2)</code> is not
* negative, and the <code>char</code> value at <code>(index -
* 2)</code> is in the high-surrogate range, then the
* supplementary code point value of the surrogate pair is
* returned. If the <code>char</code> value at <code>index -
* 1</code> is an unpaired low-surrogate or a high-surrogate, the
* surrogate value is returned.
*
* @param index the index following the code point that should be returned
* @return the Unicode code point value before the given index.
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is less than 1 or greater than the length
* of this string.
* @since 1.5
*/
public int codePointBefore(int index) {
int i = index - 1;
if ((i < 0) || (i >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return Character.codePointBeforeImpl(value, index, 0);
}
/**
* Returns the number of Unicode code points in the specified text
* range of this <code>String</code>. The text range begins at the
* specified <code>beginIndex</code> and extends to the
* <code>char</code> at index <code>endIndex - 1</code>. Thus the
* length (in <code>char</code>s) of the text range is
* <code>endIndex-beginIndex</code>. Unpaired surrogates within
* the text range count as one code point each.
*
* @param beginIndex the index to the first <code>char</code> of
* the text range.
* @param endIndex the index after the last <code>char</code> of
* the text range.
* @return the number of Unicode code points in the specified text
* range
* @exception IndexOutOfBoundsException if the
* <code>beginIndex</code> is negative, or <code>endIndex</code>
* is larger than the length of this <code>String</code>, or
* <code>beginIndex</code> is larger than <code>endIndex</code>.
* @since 1.5
*/
public int codePointCount(int beginIndex, int endIndex) {
if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
}
/**
* Returns the index within this <code>String</code> that is
* offset from the given <code>index</code> by
* <code>codePointOffset</code> code points. Unpaired surrogates
* within the text range given by <code>index</code> and
* <code>codePointOffset</code> count as one code point each.
*
* @param index the index to be offset
* @param codePointOffset the offset in code points
* @return the index within this <code>String</code>
* @exception IndexOutOfBoundsException if <code>index</code>
* is negative or larger then the length of this
* <code>String</code>, or if <code>codePointOffset</code> is positive
* and the substring starting with <code>index</code> has fewer
* than <code>codePointOffset</code> code points,
* or if <code>codePointOffset</code> is negative and the substring
* before <code>index</code> has fewer than the absolute value
* of <code>codePointOffset</code> code points.
* @since 1.5
*/
public int offsetByCodePoints(int index, int codePointOffset) {
if (index < 0 || index > value.length) {
throw new IndexOutOfBoundsException();
}
return Character.offsetByCodePointsImpl(value, 0, value.length,
index, codePointOffset);
}
/**
* Copy characters from this string into dst starting at dstBegin.
* This method doesn't perform any range checking.
*/
void getChars(char dst[], int dstBegin) {
System.arraycopy(value, 0, dst, dstBegin, value.length);
}
/**
* Copies characters from this string into the destination character
* array.
* <p>
* The first character to be copied is at index <code>srcBegin</code>;
* the last character to be copied is at index <code>srcEnd-1</code>
* (thus the total number of characters to be copied is
* <code>srcEnd-srcBegin</code>). The characters are copied into the
* subarray of <code>dst</code> starting at index <code>dstBegin</code>
* and ending at index:
* <p><blockquote><pre>
* dstbegin + (srcEnd-srcBegin) - 1
* </pre></blockquote>
*
* @param srcBegin index of the first character in the string
* to copy.
* @param srcEnd index after the last character in the string
* to copy.
* @param dst the destination array.
* @param dstBegin the start offset in the destination array.
* @exception IndexOutOfBoundsException If any of the following
* is true:
* <ul><li><code>srcBegin</code> is negative.
* <li><code>srcBegin</code> is greater than <code>srcEnd</code>
* <li><code>srcEnd</code> is greater than the length of this
* string
* <li><code>dstBegin</code> is negative
* <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
* <code>dst.length</code></ul>
*/
public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
if (srcBegin < 0) {
throw new StringIndexOutOfBoundsException(srcBegin);
}
if (srcEnd > value.length) {
throw new StringIndexOutOfBoundsException(srcEnd);
}
if (srcBegin > srcEnd) {
throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
}
System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
}
/**
* Copies characters from this string into the destination byte array. Each
* byte receives the 8 low-order bits of the corresponding character. The
* eight high-order bits of each character are not copied and do not
* participate in the transfer in any way.
*
* <p> The first character to be copied is at index {@code srcBegin}; the
* last character to be copied is at index {@code srcEnd-1}. The total
* number of characters to be copied is {@code srcEnd-srcBegin}. The
* characters, converted to bytes, are copied into the subarray of {@code
* dst} starting at index {@code dstBegin} and ending at index:
*
* <blockquote><pre>
* dstbegin + (srcEnd-srcBegin) - 1
* </pre></blockquote>
*
* @deprecated This method does not properly convert characters into
* bytes. As of JDK 1.1, the preferred way to do this is via the
* {@link #getBytes()} method, which uses the platform's default charset.
*
* @param srcBegin
* Index of the first character in the string to copy
*
* @param srcEnd
* Index after the last character in the string to copy
*
* @param dst
* The destination array
*
* @param dstBegin
* The start offset in the destination array
*
* @throws IndexOutOfBoundsException
* If any of the following is true:
* <ul>
* <li> {@code srcBegin} is negative
* <li> {@code srcBegin} is greater than {@code srcEnd}
* <li> {@code srcEnd} is greater than the length of this String
* <li> {@code dstBegin} is negative
* <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
* dst.length}
* </ul>
*/
@Deprecated
public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
if (srcBegin < 0) {
throw new StringIndexOutOfBoundsException(srcBegin);
}
if (srcEnd > value.length) {
throw new StringIndexOutOfBoundsException(srcEnd);
}
if (srcBegin > srcEnd) {
throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
}
int j = dstBegin;
int n = srcEnd;
int i = srcBegin;
char[] val = value; /* avoid getfield opcode */
while (i < n) {
dst[j++] = (byte)val[i++];
}
}
/**
* Encodes this {@code String} into a sequence of bytes using the named
* charset, storing the result into a new byte array.
*
* <p> The behavior of this method when this string cannot be encoded in
* the given charset is unspecified. The {@link
* java.nio.charset.CharsetEncoder} class should be used when more control
* over the encoding process is required.
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @return The resultant byte array
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, value, 0, value.length);
}
/**
* Encodes this {@code String} into a sequence of bytes using the given
* {@linkplain java.nio.charset.Charset charset}, storing the result into a
* new byte array.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement byte array. The
* {@link java.nio.charset.CharsetEncoder} class should be used when more
* control over the encoding process is required.
*
* @param charset
* The {@linkplain java.nio.charset.Charset} to be used to encode
* the {@code String}
*
* @return The resultant byte array
*
* @since 1.6
*/
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, value, 0, value.length);
}
/**
* Encodes this {@code String} into a sequence of bytes using the
* platform's default charset, storing the result into a new byte array.
*
* <p> The behavior of this method when this string cannot be encoded in
* the default charset is unspecified. The {@link
* java.nio.charset.CharsetEncoder} class should be used when more control
* over the encoding process is required.
*
* @return The resultant byte array
*
* @since JDK1.1
*/
public byte[] getBytes() {
return StringCoding.encode(value, 0, value.length);
}
/**
* Compares this string to the specified object. The result is {@code
* true} if and only if the argument is not {@code null} and is a {@code
* String} object that represents the same sequence of characters as this
* object.
*
* @param anObject
* The object to compare this {@code String} against
*
* @return {@code true} if the given object represents a {@code String}
* equivalent to this string, {@code false} otherwise
*
* @see #compareTo(String)
* @see #equalsIgnoreCase(String)
*/
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String anotherString = (String) anObject;
int n = value.length;
if (n == anotherString.value.length) {
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
}
return false;
}
/**
* Compares this string to the specified {@code StringBuffer}. The result
* is {@code true} if and only if this {@code String} represents the same
* sequence of characters as the specified {@code StringBuffer}.
*
* @param sb
* The {@code StringBuffer} to compare this {@code String} against
*
* @return {@code true} if this {@code String} represents the same
* sequence of characters as the specified {@code StringBuffer},
* {@code false} otherwise
*
* @since 1.4
*/
public boolean contentEquals(StringBuffer sb) {
synchronized (sb) {
return contentEquals((CharSequence) sb);
}
}
/**
* Compares this string to the specified {@code CharSequence}. The result
* is {@code true} if and only if this {@code String} represents the same
* sequence of char values as the specified sequence.
*
* @param cs
* The sequence to compare this {@code String} against
*
* @return {@code true} if this {@code String} represents the same
* sequence of char values as the specified sequence, {@code
* false} otherwise
*
* @since 1.5
*/
public boolean contentEquals(CharSequence cs) {
if (value.length != cs.length())
return false;
// Argument is a StringBuffer, StringBuilder
if (cs instanceof AbstractStringBuilder) {
char v1[] = value;
char v2[] = ((AbstractStringBuilder) cs).getValue();
int i = 0;
int n = value.length;
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
// Argument is a String
if (cs.equals(this))
return true;
// Argument is a generic CharSequence
char v1[] = value;
int i = 0;
int n = value.length;
while (n-- != 0) {
if (v1[i] != cs.charAt(i))
return false;
i++;
}
return true;
}
/**
* Compares this {@code String} to another {@code String}, ignoring case
* considerations. Two strings are considered equal ignoring case if they
* are of the same length and corresponding characters in the two strings
* are equal ignoring case.
*
* <p> Two characters {@code c1} and {@code c2} are considered the same
* ignoring case if at least one of the following is true:
* <ul>
* <li> The two characters are the same (as compared by the
* {@code ==} operator)
* <li> Applying the method {@link
* java.lang.Character#toUpperCase(char)} to each character
* produces the same result
* <li> Applying the method {@link
* java.lang.Character#toLowerCase(char)} to each character
* produces the same result
* </ul>
*
* @param anotherString
* The {@code String} to compare this {@code String} against
*
* @return {@code true} if the argument is not {@code null} and it
* represents an equivalent {@code String} ignoring case; {@code
* false} otherwise
*
* @see #equals(Object)
*/
public boolean equalsIgnoreCase(String anotherString) {
return (this == anotherString) ? true
: (anotherString != null)
&& (anotherString.value.length == value.length)
&& regionMatches(true, 0, anotherString, 0, value.length);
}
/**
* Compares two strings lexicographically.
* The comparison is based on the Unicode value of each character in
* the strings. The character sequence represented by this
* <code>String</code> object is compared lexicographically to the
* character sequence represented by the argument string. The result is
* a negative integer if this <code>String</code> object
* lexicographically precedes the argument string. The result is a
* positive integer if this <code>String</code> object lexicographically
* follows the argument string. The result is zero if the strings
* are equal; <code>compareTo</code> returns <code>0</code> exactly when
* the {@link #equals(Object)} method would return <code>true</code>.
* <p>
* This is the definition of lexicographic ordering. If two strings are
* different, then either they have different characters at some index
* that is a valid index for both strings, or their lengths are different,
* or both. If they have different characters at one or more index
* positions, let <i>k</i> be the smallest such index; then the string
* whose character at position <i>k</i> has the smaller value, as
* determined by using the < operator, lexicographically precedes the
* other string. In this case, <code>compareTo</code> returns the
* difference of the two character values at position <code>k</code> in
* the two string -- that is, the value:
* <blockquote><pre>
* this.charAt(k)-anotherString.charAt(k)
* </pre></blockquote>
* If there is no index position at which they differ, then the shorter
* string lexicographically precedes the longer string. In this case,
* <code>compareTo</code> returns the difference of the lengths of the
* strings -- that is, the value:
* <blockquote><pre>
* this.length()-anotherString.length()
* </pre></blockquote>
*
* @param anotherString the <code>String</code> to be compared.
* @return the value <code>0</code> if the argument string is equal to
* this string; a value less than <code>0</code> if this string
* is lexicographically less than the string argument; and a
* value greater than <code>0</code> if this string is
* lexicographically greater than the string argument.
*/
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
}
return len1 - len2;
}
/**
* A Comparator that orders <code>String</code> objects as by
* <code>compareToIgnoreCase</code>. This comparator is serializable.
* <p>
* Note that this Comparator does <em>not</em> take locale into account,
* and will result in an unsatisfactory ordering for certain locales.
* The java.text package provides <em>Collators</em> to allow
* locale-sensitive ordering.
*
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public static final Comparator<String> CASE_INSENSITIVE_ORDER
= new CaseInsensitiveComparator();
private static class CaseInsensitiveComparator
implements Comparator<String>, java.io.Serializable {
// use serialVersionUID from JDK 1.2.2 for interoperability
private static final long serialVersionUID = 8575799808933029326L;
public int compare(String s1, String s2) {
int n1 = s1.length();
int n2 = s2.length();
int min = Math.min(n1, n2);
for (int i = 0; i < min; i++) {
char c1 = s1.charAt(i);
char c2 = s2.charAt(i);
if (c1 != c2) {
c1 = Character.toUpperCase(c1);
c2 = Character.toUpperCase(c2);
if (c1 != c2) {
c1 = Character.toLowerCase(c1);
c2 = Character.toLowerCase(c2);
if (c1 != c2) {
// No overflow because of numeric promotion
return c1 - c2;
}
}
}
}
return n1 - n2;
}
}
/**
* Compares two strings lexicographically, ignoring case
* differences. This method returns an integer whose sign is that of
* calling <code>compareTo</code> with normalized versions of the strings
* where case differences have been eliminated by calling
* <code>Character.toLowerCase(Character.toUpperCase(character))</code> on
* each character.
* <p>
* Note that this method does <em>not</em> take locale into account,
* and will result in an unsatisfactory ordering for certain locales.
* The java.text package provides <em>collators</em> to allow
* locale-sensitive ordering.
*
* @param str the <code>String</code> to be compared.
* @return a negative integer, zero, or a positive integer as the
* specified String is greater than, equal to, or less
* than this String, ignoring case considerations.
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public int compareToIgnoreCase(String str) {
return str);
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument other. The result is true if these substrings
* represent identical character sequences. The substring of this
* <tt>String</tt> object to be compared begins at index <tt>toffset</tt>
* and has length <tt>len</tt>. The substring of other to be compared
* begins at index <tt>ooffset</tt> and has length <tt>len</tt>. The
* result is <tt>false</tt> if and only if at least one of the following
* is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li>There is some nonnegative integer <i>k</i> less than <tt>len</tt>
* such that:
* <tt>this.charAt(toffset+<i>k</i>) != other.charAt(ooffset+<i>k</i>)</tt>
* </ul>
*
* @param toffset the starting offset of the subregion in this string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* exactly matches the specified subregion of the string argument;
* <code>false</code> otherwise.
*/
public boolean regionMatches(int toffset, String other, int ooffset,
int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
// Note: toffset, ooffset, or len might be near -1>>>1.
if ((ooffset < 0) || (toffset < 0)
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) {
if (ta[to++] != pa[po++]) {
return false;
}
}
return true;
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument <tt>other</tt>. The result is <tt>true</tt> if these
* substrings represent character sequences that are the same, ignoring
* case if and only if <tt>ignoreCase</tt> is true. The substring of
* this <tt>String</tt> object to be compared begins at index
* <tt>toffset</tt> and has length <tt>len</tt>. The substring of
* <tt>other</tt> to be compared begins at index <tt>ooffset</tt> and
* has length <tt>len</tt>. The result is <tt>false</tt> if and only if
* at least one of the following is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li><tt>ignoreCase</tt> is <tt>false</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* this.charAt(toffset+k) != other.charAt(ooffset+k)
* </pre></blockquote>
* <li><tt>ignoreCase</tt> is <tt>true</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* Character.toLowerCase(this.charAt(toffset+k)) !=
Character.toLowerCase(other.charAt(ooffset+k))
* </pre></blockquote>
* and:
* <blockquote><pre>
* Character.toUpperCase(this.charAt(toffset+k)) !=
* Character.toUpperCase(other.charAt(ooffset+k))
* </pre></blockquote>
* </ul>
*
* @param ignoreCase if <code>true</code>, ignore case when comparing
* characters.
* @param toffset the starting offset of the subregion in this
* string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* matches the specified subregion of the string argument;
* <code>false</code> otherwise. Whether the matching is exact
* or case insensitive depends on the <code>ignoreCase</code>
* argument.
*/
public boolean regionMatches(boolean ignoreCase, int toffset,
String other, int ooffset, int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
// Note: toffset, ooffset, or len might be near -1>>>1.
if ((ooffset < 0) || (toffset < 0)
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) {
char c1 = ta[to++];
char c2 = pa[po++];
if (c1 == c2) {
continue;
}
if (ignoreCase) {
// If characters don't match but case may be ignored,
// try converting both characters to uppercase.
// If the results match, then the comparison scan should
// continue.
char u1 = Character.toUpperCase(c1);
char u2 = Character.toUpperCase(c2);
if (u1 == u2) {
continue;
}
// Unfortunately, conversion to uppercase does not work properly
// for the Georgian alphabet, which has strange rules about case
// conversion. So we need to make one last check before
// exiting.
if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
continue;
}
}
return false;
}
return true;
}