1. mysql

在使用mysql时候,某些字段会存储中文字符复,或是包含中文字符的串,查询出来的方制法是:

SELECT col FROM table WHERE length(col)!知=char_length(col)

当字符集为UTF-8,并且字符为中文时,length() 和 char_length() 两个方法返回的结果是不相道同的。

  1. Java

Java

// GENERAL_PUNCTUATION 判断中文的“号  
   // CJK_SYMBOLS_AND_PUNCTUATION 判断中文的。号  
   // HALFWIDTH_AND_FULLWIDTH_FORMS 判断中文的,号  
   private static final boolean isChinese(char c) {  
       Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);  
       if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS  
               || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS  
               || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A  
               || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION  
                || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION  
                || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {  
            return true;  
        }  
        return false;  
    }  
  
    public static final boolean isChinese(String strName) {  
        char[] ch = strName.toCharArray();  
        for (int i = 0; i < ch.length; i++) {  
            char c = ch[i];  
            if (isChinese(c)) {  
                return true;  
            }  
        }  
        return false;  
    }  


 2.   kotlin
// CJK_SYMBOLS_AND_PUNCTUATION 判断中文的。号
// HALFWIDTH_AND_FULLWIDTH_FORMS 判断中文的,号
private fun isChinese(c: Char): Boolean {
    val ub = UnicodeBlock.of(c)
    return ub === UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub === UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub === UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub === UnicodeBlock.GENERAL_PUNCTUATION || ub === UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub === UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
}

fun isChinese(strName: String): Boolean {
    val ch = strName.toCharArray()
    for (i in ch.indices) {
        val c = ch[i]
        if (isChinese(c)) {
            return true
        }
    }
    return false
}