关于操纵字符串的简单商量。   
               添加时间:2013-7-9 点击量: 
 
              始于之前的阿里口试,按照口试官的提示,重写了三遍办法才写对,教条?不过我对此有别的的见地。
如下代码,三种不合的截取体式格式,前面两种都比口试官所说的办法快,速度并且不在同一个量级。
而以字节操纵又比以字符操纵略慢,其原因在于以字节操纵的最后请求new String,从源码可知其会导致从头按字符集编码。
可知,快速的操纵字符串,底子的手段是以JAVA字符(char)情势进行操纵,其次是字节情势,在字节与字符之间互操纵是机能最差的体式格式。
import java.io.UnsupportedEncodingException;
import java.util.Random;
public class Example {
    /
      
      <pre>
      以字符字节的情势操纵字节截取。
      </pre>
     
      @param src
      @param length
      @param charset
      @return
      @throws UnsupportedEncodingException
     /
    public static String subStringInChar(String src, int length, String charset) throws UnsupportedEncodingException {
        int srcLength = src.length();
        int i = 0;
        int len = 0;
        int rollback = 0;
        while (true) {
            if (len >= length || length>=srcLength) {
                break;
            }
            
            if (src.charAt(i) < 256) {
                rollback=0;
                len += 1;
            } else {
                rollback=1;
                len += 2;
            }
            i+=1;
        }
        
        if (len!=length){
            i-=rollback;
        }
        return src.substring(0, i);
    }
    /
      
      <pre>
      逐字节截取字符。
      </pre>
     
      @param src
      @param length
      @param charset
      @return
      @throws UnsupportedEncodingException
     /
    public static String subString(String src, int length, String charset) throws UnsupportedEncodingException {
        if (charset == null) {
            charset = GBK;
        }
        
        int firstStartScope = 129 - 1;
        int firstEndScope = 254 + 1;
        int secondStartScope = 64 - 1;
        int secondEndScope = 254 + 1;
        byte[] bytes = src.getBytes(charset);
        int i = 0;
        for (; i < bytes.length;) {
            int b1 = bytes[i] & 0 xFF;
            int b2 = bytes[i+1] & 0 xFF;
            if (b1 > firstStartScope && b1 < firstEndScope && b2 > secondStartScope && b2 < secondEndScope) {
                
                if (i+1==length){
                    i=length-1;
                    break;
                }else if (i+1>length){
                    i=length;
                    break;
                }
                
                i+=2;
            }else{
                i+=1;
                
                if (i==length){
                    break;
                }
            }
        }
        
        return new String(bytes, 0, i, charset);
    }
    /
      
      <pre>
      口试官承认的操纵办法
      </pre>
     
      @param str
      @param subSLength
      @param charset
      @return
      @throws UnsupportedEncodingException
     /
    public static String subStr(String str, int subSLength, String charset) throws UnsupportedEncodingException {
        if (charset == null) {
            charset = GBK;
        }
        
        if (str == null) {
            return ;
        } else {
            int tempSubLength = subSLength;//截取字节数
            int strLength = str.length();
            String subStr = str.substring(0, strLength < subSLength ? strLength : subSLength);//截取的子串  
            int subStrByetsL = subStr.getBytes(charset).length;//截取子串的字节长度 
            while (subStrByetsL > tempSubLength) {
                int subSLengthTemp = --subSLength;
                subStr = str.substring(0, subSLengthTemp > strLength ? strLength : subSLengthTemp);
                subStrByetsL = subStr.getBytes(charset).length;
                //subStrByetsL = subStr.getBytes().length;
            }
            return subStr;
        }
    }
    private static Random random = new Random();
    public static String getRandomString() {
        String base = abcdefghijklmnopqrstuvwxyz0123456789;
        int randomLength = 10000;
        int strLength = random.nextInt(randomLength);
        while (strLength < randomLength / 2) {
            strLength = random.nextInt(randomLength);
        }
        char[] chars = new char[strLength];
        for (int i = 0; i < chars.length; i++) {
            char c = (char) (0 x4e00 + (int) (Math.random()  (0 x9fa5 - 0 x4e00 + 1)));
            if (random.nextInt(3) > 0) {
                c = base.charAt(random.nextInt(base.length()));
            }
            chars[i] = c;
        }
        return new String(chars);
    }
    public static void main(String[] args) throws UnsupportedEncodingException {
        long start = System.currentTimeMillis();
        long s1 = 0;
        long s2 = 0;
        long s3 = 0;
        Random random = new Random();
        for (int i = 0; i < 100; i++) {
            String str = getRandomString();
//            String str = 我ABC汗;
            //            System.out.println(str);
            if (str.length() == 0) {
                continue;
            }
            start = System.currentTimeMillis();
            int randomLeng = random.nextInt(str.length());
//            int randomLeng = 6;
            
            String str1 = subString(str, randomLeng, null);
            //            System.out.println(str1);
            s1 += (System.currentTimeMillis() - start);
            start = System.currentTimeMillis();
            String str2 = subStr(str, randomLeng, null);
            //            System.out.println(str2);
            s2 += (System.currentTimeMillis() - start);
            start = System.currentTimeMillis();
            String str3 = subStringInChar(str, randomLeng, null);
            //            System.out.println(str3);
            s3 += (System.currentTimeMillis() - start);
            if (!str1.equals(str2) || !str2.equals(str3) || !str1.equals(str3)) {
                System.out.println(i);
                System.out.println(str);
                System.out.println(randomLeng);
                System.out.println(str1);
                System.out.println(str2);
                System.out.println(str3);
                break;
            }
        }
        System.out.println(s1);
        System.out.println(s2);
        System.out.println(s3);
    }
}
容易发怒的意思就是: 别人做了蠢事, 然后我们代替他们, 表现出笨蛋的样子。—— 蔡康永
                     
                  
     
  
 
    
    
始于之前的阿里口试,按照口试官的提示,重写了三遍办法才写对,教条?不过我对此有别的的见地。
如下代码,三种不合的截取体式格式,前面两种都比口试官所说的办法快,速度并且不在同一个量级。
而以字节操纵又比以字符操纵略慢,其原因在于以字节操纵的最后请求new String,从源码可知其会导致从头按字符集编码。
可知,快速的操纵字符串,底子的手段是以JAVA字符(char)情势进行操纵,其次是字节情势,在字节与字符之间互操纵是机能最差的体式格式。
import java.io.UnsupportedEncodingException;
import java.util.Random;
public class Example {
/
<pre>
以字符字节的情势操纵字节截取。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStringInChar(String src, int length, String charset) throws UnsupportedEncodingException {
int srcLength = src.length();
int i = 0;
int len = 0;
int rollback = 0;
while (true) {
if (len >= length || length>=srcLength) {
break;
}
if (src.charAt(i) < 256) {
rollback=0;
len += 1;
} else {
rollback=1;
len += 2;
}
i+=1;
}
if (len!=length){
i-=rollback;
}
return src.substring(0, i);
}
/
<pre>
逐字节截取字符。
</pre>
@param src
@param length
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subString(String src, int length, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
int firstStartScope = 129 - 1;
int firstEndScope = 254 + 1;
int secondStartScope = 64 - 1;
int secondEndScope = 254 + 1;
byte[] bytes = src.getBytes(charset);
int i = 0;
for (; i < bytes.length;) {
int b1 = bytes[i] & 0 xFF;
int b2 = bytes[i+1] & 0 xFF;
if (b1 > firstStartScope && b1 < firstEndScope && b2 > secondStartScope && b2 < secondEndScope) {
if (i+1==length){
i=length-1;
break;
}else if (i+1>length){
i=length;
break;
}
i+=2;
}else{
i+=1;
if (i==length){
break;
}
}
}
return new String(bytes, 0, i, charset);
}
/
<pre>
口试官承认的操纵办法
</pre>
@param str
@param subSLength
@param charset
@return
@throws UnsupportedEncodingException
/
public static String subStr(String str, int subSLength, String charset) throws UnsupportedEncodingException {
if (charset == null) {
charset = GBK;
}
if (str == null) {
return ;
} else {
int tempSubLength = subSLength;//截取字节数
int strLength = str.length();
String subStr = str.substring(0, strLength < subSLength ? strLength : subSLength);//截取的子串
int subStrByetsL = subStr.getBytes(charset).length;//截取子串的字节长度
while (subStrByetsL > tempSubLength) {
int subSLengthTemp = --subSLength;
subStr = str.substring(0, subSLengthTemp > strLength ? strLength : subSLengthTemp);
subStrByetsL = subStr.getBytes(charset).length;
//subStrByetsL = subStr.getBytes().length;
}
return subStr;
}
}
private static Random random = new Random();
public static String getRandomString() {
String base = abcdefghijklmnopqrstuvwxyz0123456789;
int randomLength = 10000;
int strLength = random.nextInt(randomLength);
while (strLength < randomLength / 2) {
strLength = random.nextInt(randomLength);
}
char[] chars = new char[strLength];
for (int i = 0; i < chars.length; i++) {
char c = (char) (0 x4e00 + (int) (Math.random() (0 x9fa5 - 0 x4e00 + 1)));
if (random.nextInt(3) > 0) {
c = base.charAt(random.nextInt(base.length()));
}
chars[i] = c;
}
return new String(chars);
}
public static void main(String[] args) throws UnsupportedEncodingException {
long start = System.currentTimeMillis();
long s1 = 0;
long s2 = 0;
long s3 = 0;
Random random = new Random();
for (int i = 0; i < 100; i++) {
String str = getRandomString();
// String str = 我ABC汗;
// System.out.println(str);
if (str.length() == 0) {
continue;
}
start = System.currentTimeMillis();
int randomLeng = random.nextInt(str.length());
// int randomLeng = 6;
String str1 = subString(str, randomLeng, null);
// System.out.println(str1);
s1 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str2 = subStr(str, randomLeng, null);
// System.out.println(str2);
s2 += (System.currentTimeMillis() - start);
start = System.currentTimeMillis();
String str3 = subStringInChar(str, randomLeng, null);
// System.out.println(str3);
s3 += (System.currentTimeMillis() - start);
if (!str1.equals(str2) || !str2.equals(str3) || !str1.equals(str3)) {
System.out.println(i);
System.out.println(str);
System.out.println(randomLeng);
System.out.println(str1);
System.out.println(str2);
System.out.println(str3);
break;
}
}
System.out.println(s1);
System.out.println(s2);
System.out.println(s3);
}
}
容易发怒的意思就是: 别人做了蠢事, 然后我们代替他们, 表现出笨蛋的样子。—— 蔡康永




