汉字转换为拼音
用一种简单的方法将字符串转化为拼音:
- 将需要转换的字符串t1里的字符t1[i]按照t3的格式格式化为拼音,并复制给t2
- 如果t1[i]不是汉字,则不转换,直接把t1[i]复制给t2
- 将t2首字母大写,复制给t4
这里将用到pinyin4j.jar
包,请自行百度下载。
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
public class Chinese2Pinyin {
public static String getPinyin(String src) {
char[] t1 = null;
t1 = src.toCharArray();
String[] t2 = new String[t1.length];
HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat();
t3.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小写格式
t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 有无音标
t3.setVCharType(HanyuPinyinVCharType.WITH_V);
String t4 = "";
try {
for (int i = 0; i < t1.length; i++) {
// 判断是否为汉字字符
// if(t1[i] >= 32 && t1[i] <= 125)//ASCII码表范围内直接返回
if (String.valueOf(t1[i]).matches("[\\u4E00-\\u9FA5]+")) {
t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3);// 转化为拼音
t4 += t2[0].substring(0, 1).toUpperCase() + t2[0].substring(1);// 首字母大写
} else {
t4 += String.valueOf(t1[i]);// 不是汉字不处理
}
}
} catch (BadHanyuPinyinOutputFormatCombination e1) {
e1.printStackTrace();
}
return t4;
}
public static void main(String[] args) {
String s = getPinyin("西安电子科技大学");
System.out.println(s);
String s1 = getPinyin("西安");
System.out.println(s1);
String s2 = getPinyin("成都");
System.out.println(s2);// ChengDu
}
}
运行结果为:
XiAnDianZiKeJiDaXue
XiAn
ChengDou
可以看到,“成都(du)”
被转化成了“ChengDou”
,这是因为pinyin4j.jar
无法识别多音字的原因。如果是多音字
,t2[k]存放着某个字符t1[i]的多个拼音,是一个数组
,默认取t2[0]为该字的拼音。
多音字处理
这里采用查字典
的方法,将需要转换的字符子串与字典里的词组进行匹配,如果匹配到,这把字典里的读音作为改字的拼音。单字随机分配发音,如果是字典里的常用单字,则按照字典发音。
首先,下载多音字字典文件:下载字典
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
public class Chinese2Pinyin {
public static Map<String, String> dictionary = new HashMap<String, String>();
static String filePath = "C:\\dict\\duoyinzi_pinyin.txt";
// 加载多音字词典
static {
BufferedReader br = null;
try {
File file = new File(filePath);
br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
String line = null;
while ((line = br.readLine()) != null) {
String[] arr = line.split("#");
if (StringUtils.isNotEmpty(arr[1])) {
String[] sems = arr[1].split(" ");
for (String sem : sems) {
if (StringUtils.isNotEmpty(sem)) {
dictionary.put(sem, arr[0]);
}
}
}
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static String getPinyin(String src) {
char[] t1 = null;
t1 = src.toCharArray();
String[] t2 = new String[t1.length];
HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat();
t3.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小写格式
t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 有无音标
t3.setVCharType(HanyuPinyinVCharType.WITH_V);
String t4 = "";
try {
for (int i = 0; i < t1.length; i++) {
// 判断是否为汉字字符
// if(t1[i] >= 32 && t1[i] <= 125)//ASCII码表范围内直接返回
if (String.valueOf(t1[i]).matches("[\\u4E00-\\u9FA5]+")) {
t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3);// 转化为拼音
//如果是单个汉字,不处理,随机分配拼音
if (i != t1.length - 1 && t1.length != 1) {
String dic = String.valueOf(t1[i]) + String.valueOf(t1[i + 1]);
for (String py : t2) {
if (py.equals(dictionary.get(dic)) || py.equals(dictionary.get(String.valueOf(t1[i])))) {
t2[0] = py;// 把t2[0]作为存放正确发音的区域
break;
}
}
} else if (t1.length != 1) {
String dic = String.valueOf(t1[i - 1]) + String.valueOf(t1[i]);
for (String py : t2) {
if (py.equals(dictionary.get(dic)) || py.equals(dictionary.get(String.valueOf(t1[i])))) {
t2[0] = py;// 把t2[0]作为存放正确发音的区域
break;
}
}
}
t4 += t2[0].substring(0, 1).toUpperCase() + t2[0].substring(1);// 首字母大写
} else {
t4 += String.valueOf(t1[i]);// 不是汉字不处理
}
}
} catch (BadHanyuPinyinOutputFormatCombination e1) {
e1.printStackTrace();
}
return t4;
}
public static void main(String[] args) {
String s = getPinyin("西安电子科技大学");
System.out.println(s);
String s1 = getPinyin("西安");
System.out.println(s1);
String s2 = getPinyin("成都");
System.out.println(s2);// ChengDu
}
}
运行结果为:
XiAnDianZiKeJiDaXue
XiAn
ChengDu