} } }

    关于操纵字符串的简单商量。

    添加时间:2013-7-9 点击量:

    始于之前的阿里口试,按照口试官的提示,重写了三遍办法才写对,教条?不过我对此有别的的见地。



    如下代码,三种不合的截取体式格式,前面两种都比口试官所说的办法快,速度并且不在同一个量级。


    而以字节操纵又比以字符操纵略慢,其原因在于以字节操纵的最后请求new String,从源码可知其会导致从头按字符集编码。


    可知,快速的操纵字符串,底子的手段是以JAVA字符(char)情势进行操纵,其次是字节情势,在字节与字符之间互操纵是机能最差的体式格式。




    import java.io.UnsupportedEncodingException;
    
    import java.util.Random;

    public class Example {

    /

    <pre>
    以字符字节的情势操纵字节截取。
    </pre>

    @param src
    @param length
    @param charset
    @return
    @throws UnsupportedEncodingException
    /
    public static String subStringInChar(String src, int length, String charset) throws UnsupportedEncodingException {
    int srcLength = src.length();

    int i = 0;
    int len = 0;
    int rollback = 0;
    whiletrue) {
    if (len >= length || length>=srcLength) {
    break;
    }

    if (src.charAt(i) < 256) {
    rollback
    =0;
    len
    += 1;
    }
    else {
    rollback
    =1;
    len
    += 2;
    }
    i
    +=1;
    }

    if (len!=length){
    i
    -=rollback;
    }

    return src.substring(0, i);
    }

    /

    <pre>
    逐字节截取字符。
    </pre>

    @param src
    @param length
    @param charset
    @return
    @throws UnsupportedEncodingException
    /
    public static String subString(String src, int length, String charset) throws UnsupportedEncodingException {
    if (charset == null) {
    charset
    = GBK;
    }

    int firstStartScope = 129 - 1;
    int firstEndScope = 254 + 1;

    int secondStartScope = 64 - 1;
    int secondEndScope = 254 + 1;

    byte[] bytes = src.getBytes(charset);
    int i = 0;
    for (; i < bytes.length;) {
    int b1 = bytes[i] & 0 xFF;
    int b2 = bytes[i+1] & 0 xFF;

    if (b1 > firstStartScope && b1 < firstEndScope && b2 > secondStartScope && b2 < secondEndScope) {

    if (i+1==length){
    i
    =length-1;
    break;
    }
    else if (i+1>length){
    i
    =length;
    break;
    }

    i
    +=2;
    }
    else{
    i
    +=1;

    if (i==length){
    break;
    }
    }
    }

    return new String(bytes, 0, i, charset);
    }

    /

    <pre>
    口试官承认的操纵办法
    </pre>

    @param str
    @param subSLength
    @param charset
    @return
    @throws UnsupportedEncodingException
    /
    public static String subStr(String str, int subSLength, String charset) throws UnsupportedEncodingException {
    if (charset == null) {
    charset
    = GBK;
    }

    if (str == null) {
    return ;
    }
    else {
    int tempSubLength = subSLength;//截取字节数
    int strLength = str.length();
    String subStr
    = str.substring(0, strLength < subSLength ? strLength : subSLength);//截取的子串
    int subStrByetsL = subStr.getBytes(charset).length;//截取子串的字节长度
    while (subStrByetsL > tempSubLength) {
    int subSLengthTemp = --subSLength;
    subStr
    = str.substring(0, subSLengthTemp > strLength ? strLength : subSLengthTemp);
    subStrByetsL
    = subStr.getBytes(charset).length;
    //subStrByetsL = subStr.getBytes().length;
    }
    return subStr;
    }
    }

    private static Random random = new Random();

    public static String getRandomString() {
    String base
    = abcdefghijklmnopqrstuvwxyz0123456789;
    int randomLength = 10000;
    int strLength = random.nextInt(randomLength);
    while (strLength < randomLength / 2) {
    strLength
    = random.nextInt(randomLength);
    }
    char[] chars = new char[strLength];
    forint i = 0; i < chars.length; i++) {
    char c = (char) (0 x4e00 + (int) (Math.random() (0 x9fa5 - 0 x4e00 + 1)));
    if (random.nextInt(3) > 0) {
    c
    = base.charAt(random.nextInt(base.length()));
    }
    chars[i]
    = c;
    }

    return new String(chars);
    }

    public static void main(String[] args) throws UnsupportedEncodingException {
    long start = System.currentTimeMillis();
    long s1 = 0;
    long s2 = 0;
    long s3 = 0;
    Random random
    = new Random();
    forint i = 0; i < 100; i++) {
    String str
    = getRandomString();
    // String str = 我ABC汗;
    // System.out.println(str);
    if (str.length() == 0) {
    continue;
    }
    start
    = System.currentTimeMillis();
    int randomLeng = random.nextInt(str.length());
    // int randomLeng = 6;

    String str1
    = subString(str, randomLeng, null);
    // System.out.println(str1);
    s1 += (System.currentTimeMillis() - start);

    start
    = System.currentTimeMillis();
    String str2
    = subStr(str, randomLeng, null);
    // System.out.println(str2);
    s2 += (System.currentTimeMillis() - start);

    start
    = System.currentTimeMillis();
    String str3
    = subStringInChar(str, randomLeng, null);
    // System.out.println(str3);
    s3 += (System.currentTimeMillis() - start);

    if (!str1.equals(str2) || !str2.equals(str3) || !str1.equals(str3)) {
    System.out.println(i);
    System.out.println(str);
    System.out.println(randomLeng);
    System.out.println(str1);
    System.out.println(str2);
    System.out.println(str3);
    break;
    }
    }

    System.out.println(s1);
    System.out.println(s2);
    System.out.println(s3);

    }

    }


    容易发怒的意思就是: 别人做了蠢事, 然后我们代替他们, 表现出笨蛋的样子。—— 蔡康永
    分享到: