关于操纵字符串的简单商量。
添加时间:2013-7-9 点击量:
始于之前的阿里口试,按照口试官的提示,重写了三遍办法才写对,教条?不过我对此有别的的见地。
如下代码,三种不合的截取体式格式,前面两种都比口试官所说的办法快,速度并且不在同一个量级。
而以字节操纵又比以字符操纵略慢,其原因在于以字节操纵的最后请求new String,从源码可知其会导致从头按字符集编码。
可知,快速的操纵字符串,底子的手段是以JAVA字符(char)情势进行操纵,其次是字节情势,在字节与字符之间互操纵是机能最差的体式格式。
import java.io.UnsupportedEncodingException;
import java.util.Random;
public class Example {
/
<pre>
以字符字节的情势操纵字节截取。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStringInChar(String src, int length, String charset) throws UnsupportedEncodingException {
int srcLength = src.length();
int i = 0;
int len = 0;
int rollback = 0;
while (true) {
if (len >= length || length>=srcLength) {
break;
}
if (src.charAt(i) < 256) {
rollback=0;
len += 1;
} else {
rollback=1;
len += 2;
}
i+=1;
}
if (len!=length){
i-=rollback;
}
return src.substring(0, i);
}
/
<pre>
逐字节截取字符。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subString(String src, int length, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
int firstStartScope = 129 - 1;
int firstEndScope = 254 + 1;
int secondStartScope = 64 - 1;
int secondEndScope = 254 + 1;
byte[] bytes = src.getBytes(charset);
int i = 0;
for (; i < bytes.length;) {
int b1 = bytes[i] & 0 xFF;
int b2 = bytes[i+1] & 0 xFF;
if (b1 > firstStartScope && b1 < firstEndScope && b2 > secondStartScope && b2 < secondEndScope) {
if (i+1==length){
i=length-1;
break;
}else if (i+1>length){
i=length;
break;
}
i+=2;
}else{
i+=1;
if (i==length){
break;
}
}
}
return new String(bytes, 0, i, charset);
}
/
<pre>
口试官承认的操纵办法
</pre>
@param str
@param subSLength
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStr(String str, int subSLength, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
if (str == null) {
return ;
} else {
int tempSubLength = subSLength;//截取字节数
int strLength = str.length();
String subStr = str.substring(0, strLength < subSLength ? strLength : subSLength);//截取的子串
int subStrByetsL = subStr.getBytes(charset).length;//截取子串的字节长度
while (subStrByetsL > tempSubLength) {
int subSLengthTemp = --subSLength;
subStr = str.substring(0, subSLengthTemp > strLength ? strLength : subSLengthTemp);
subStrByetsL = subStr.getBytes(charset).length;
//subStrByetsL = subStr.getBytes().length;
}
return subStr;
}
}
private static Random random = new Random();
public static String getRandomString() {
String base = abcdefghijklmnopqrstuvwxyz0123456789;
int randomLength = 10000;
int strLength = random.nextInt(randomLength);
while (strLength < randomLength / 2) {
strLength = random.nextInt(randomLength);
}
char[] chars = new char[strLength];
for (int i = 0; i < chars.length; i++) {
char c = (char) (0 x4e00 + (int) (Math.random() (0 x9fa5 - 0 x4e00 + 1)));
if (random.nextInt(3) > 0) {
c = base.charAt(random.nextInt(base.length()));
}
chars[i] = c;
}
return new String(chars);
}
public static void main(String[] args) throws UnsupportedEncodingException {
long start = System.currentTimeMillis();
long s1 = 0;
long s2 = 0;
long s3 = 0;
Random random = new Random();
for (int i = 0; i < 100; i++) {
String str = getRandomString();
// String str = 我ABC汗;
// System.out.println(str);
if (str.length() == 0) {
continue;
}
start = System.currentTimeMillis();
int randomLeng = random.nextInt(str.length());
// int randomLeng = 6;
String str1 = subString(str, randomLeng, null);
// System.out.println(str1);
s1 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str2 = subStr(str, randomLeng, null);
// System.out.println(str2);
s2 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str3 = subStringInChar(str, randomLeng, null);
// System.out.println(str3);
s3 += (System.currentTimeMillis() - start);
if (!str1.equals(str2) || !str2.equals(str3) || !str1.equals(str3)) {
System.out.println(i);
System.out.println(str);
System.out.println(randomLeng);
System.out.println(str1);
System.out.println(str2);
System.out.println(str3);
break;
}
}
System.out.println(s1);
System.out.println(s2);
System.out.println(s3);
}
}
容易发怒的意思就是: 别人做了蠢事, 然后我们代替他们, 表现出笨蛋的样子。—— 蔡康永
始于之前的阿里口试,按照口试官的提示,重写了三遍办法才写对,教条?不过我对此有别的的见地。
如下代码,三种不合的截取体式格式,前面两种都比口试官所说的办法快,速度并且不在同一个量级。
而以字节操纵又比以字符操纵略慢,其原因在于以字节操纵的最后请求new String,从源码可知其会导致从头按字符集编码。
可知,快速的操纵字符串,底子的手段是以JAVA字符(char)情势进行操纵,其次是字节情势,在字节与字符之间互操纵是机能最差的体式格式。
import java.io.UnsupportedEncodingException;
import java.util.Random;
public class Example {
/
<pre>
以字符字节的情势操纵字节截取。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStringInChar(String src, int length, String charset) throws UnsupportedEncodingException {
int srcLength = src.length();
int i = 0;
int len = 0;
int rollback = 0;
while (true) {
if (len >= length || length>=srcLength) {
break;
}
if (src.charAt(i) < 256) {
rollback=0;
len += 1;
} else {
rollback=1;
len += 2;
}
i+=1;
}
if (len!=length){
i-=rollback;
}
return src.substring(0, i);
}
/
<pre>
逐字节截取字符。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subString(String src, int length, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
int firstStartScope = 129 - 1;
int firstEndScope = 254 + 1;
int secondStartScope = 64 - 1;
int secondEndScope = 254 + 1;
byte[] bytes = src.getBytes(charset);
int i = 0;
for (; i < bytes.length;) {
int b1 = bytes[i] & 0 xFF;
int b2 = bytes[i+1] & 0 xFF;
if (b1 > firstStartScope && b1 < firstEndScope && b2 > secondStartScope && b2 < secondEndScope) {
if (i+1==length){
i=length-1;
break;
}else if (i+1>length){
i=length;
break;
}
i+=2;
}else{
i+=1;
if (i==length){
break;
}
}
}
return new String(bytes, 0, i, charset);
}
/
<pre>
口试官承认的操纵办法
</pre>
@param str
@param subSLength
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStr(String str, int subSLength, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
if (str == null) {
return ;
} else {
int tempSubLength = subSLength;//截取字节数
int strLength = str.length();
String subStr = str.substring(0, strLength < subSLength ? strLength : subSLength);//截取的子串
int subStrByetsL = subStr.getBytes(charset).length;//截取子串的字节长度
while (subStrByetsL > tempSubLength) {
int subSLengthTemp = --subSLength;
subStr = str.substring(0, subSLengthTemp > strLength ? strLength : subSLengthTemp);
subStrByetsL = subStr.getBytes(charset).length;
//subStrByetsL = subStr.getBytes().length;
}
return subStr;
}
}
private static Random random = new Random();
public static String getRandomString() {
String base = abcdefghijklmnopqrstuvwxyz0123456789;
int randomLength = 10000;
int strLength = random.nextInt(randomLength);
while (strLength < randomLength / 2) {
strLength = random.nextInt(randomLength);
}
char[] chars = new char[strLength];
for (int i = 0; i < chars.length; i++) {
char c = (char) (0 x4e00 + (int) (Math.random() (0 x9fa5 - 0 x4e00 + 1)));
if (random.nextInt(3) > 0) {
c = base.charAt(random.nextInt(base.length()));
}
chars[i] = c;
}
return new String(chars);
}
public static void main(String[] args) throws UnsupportedEncodingException {
long start = System.currentTimeMillis();
long s1 = 0;
long s2 = 0;
long s3 = 0;
Random random = new Random();
for (int i = 0; i < 100; i++) {
String str = getRandomString();
// String str = 我ABC汗;
// System.out.println(str);
if (str.length() == 0) {
continue;
}
start = System.currentTimeMillis();
int randomLeng = random.nextInt(str.length());
// int randomLeng = 6;
String str1 = subString(str, randomLeng, null);
// System.out.println(str1);
s1 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str2 = subStr(str, randomLeng, null);
// System.out.println(str2);
s2 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str3 = subStringInChar(str, randomLeng, null);
// System.out.println(str3);
s3 += (System.currentTimeMillis() - start);
if (!str1.equals(str2) || !str2.equals(str3) || !str1.equals(str3)) {
System.out.println(i);
System.out.println(str);
System.out.println(randomLeng);
System.out.println(str1);
System.out.println(str2);
System.out.println(str3);
break;
}
}
System.out.println(s1);
System.out.println(s2);
System.out.println(s3);
}
}
容易发怒的意思就是: 别人做了蠢事, 然后我们代替他们, 表现出笨蛋的样子。—— 蔡康永