【拼音处理】

This commit is contained in:
weizhihong 2023-04-10 10:51:40 +08:00
parent 35b1287875
commit d3bb96d95e
2 changed files with 62 additions and 3 deletions

View File

@ -6,8 +6,11 @@ import club.joylink.rtss.simulation.cbtc.discriminate.ParamExtractResult;
import club.joylink.rtss.simulation.cbtc.discriminate.ParamExtractRule; import club.joylink.rtss.simulation.cbtc.discriminate.ParamExtractRule;
import club.joylink.rtss.simulation.cbtc.discriminate.VoiceDiscriminateResult; import club.joylink.rtss.simulation.cbtc.discriminate.VoiceDiscriminateResult;
import club.joylink.rtss.simulation.cbtc.discriminate.VoiceDiscriminateRule; import club.joylink.rtss.simulation.cbtc.discriminate.VoiceDiscriminateRule;
import club.joylink.rtss.util.PinYinUtil;
import club.joylink.rtss.vo.client.voice.VoiceRecognitionResult; import club.joylink.rtss.vo.client.voice.VoiceRecognitionResult;
import club.joylink.rtss.vo.client.voice.VoiceRecognitionVO; import club.joylink.rtss.vo.client.voice.VoiceRecognitionVO;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
@ -66,7 +69,7 @@ public class VoiceParseServiceImpl implements VoiceParseService {
* @return 拼音信息 * @return 拼音信息
*/ */
private String translateToPinYin(String content) { private String translateToPinYin(String content) {
return null; return PinYinUtil.toPinYin(content);
} }
/** /**
@ -81,7 +84,7 @@ public class VoiceParseServiceImpl implements VoiceParseService {
// 匹配指令 // 匹配指令
List<String> matchGroupList = null; List<String> matchGroupList = null;
for (VoiceDiscriminateRule rule : ruleList) { for (VoiceDiscriminateRule rule : ruleList) {
matchGroupList = keyWordsMatch(rule.getKeyWordRules(), result.getOriginContent()); matchGroupList = keyWordsMatch(rule.getKeyWordRules(), result.getMatchOriginContent());
if (!CollectionUtils.isEmpty(matchGroupList)) { if (!CollectionUtils.isEmpty(matchGroupList)) {
result.setRule(rule); result.setRule(rule);
result.setMatchGroupList(matchGroupList); result.setMatchGroupList(matchGroupList);
@ -101,7 +104,7 @@ public class VoiceParseServiceImpl implements VoiceParseService {
*/ */
private List<String> keyWordsMatch(String patternStr, String content) { private List<String> keyWordsMatch(String patternStr, String content) {
List<String> groupList = new ArrayList<>(); List<String> groupList = new ArrayList<>();
Pattern pattern = Pattern.compile(patternStr);// 匹配的模式 Pattern pattern = Pattern.compile(translateToPinYin(patternStr));// 匹配的模式
Matcher matcher = pattern.matcher(content); Matcher matcher = pattern.matcher(content);
if (matcher.find()) { if (matcher.find()) {
for (int index = 1, size = matcher.groupCount(); index <= size; index++) { for (int index = 1, size = matcher.groupCount(); index <= size; index++) {

View File

@ -0,0 +1,56 @@
package club.joylink.rtss.util;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/**
* 汉字转拼音工具类
*/
public class PinYinUtil {
private final static HanyuPinyinOutputFormat PIN_YIN_FORMAT = new HanyuPinyinOutputFormat();
static{
/**
* 输出大小写设置
*
* LOWERCASE:输出小写
* UPPERCASE:输出大写
*/
PIN_YIN_FORMAT.setCaseType(HanyuPinyinCaseType.UPPERCASE);
/**
* 输出音标设置
*
* WITH_TONE_MARK:直接用音标符必须设置WITH_U_UNICODE否则会抛出异常
* WITH_TONE_NUMBER1-4数字表示音标
* WITHOUT_TONE没有音标
*/
PIN_YIN_FORMAT.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
}
/**
* 汉字转拼音
*
* @param content 内容
* @return 拼音信息
*/
public static String toPinYin(String content) {
StringBuilder sb = new StringBuilder();
for(char c : content.toCharArray()){
if(Character.toString(c).matches("[\\u4E00-\\u9FA5]+")) {
try {
String[] ss = PinyinHelper.toHanyuPinyinStringArray(c,PIN_YIN_FORMAT);
sb.append(ss[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}else{
sb.append(c);
}
}
return sb.toString();
}
}