package com.ch.jedge.jbot.dict; import com.changhong.qlib.util.StringUtils; import com.changhong.qlib.util.file.FileUtils; import java.io.File; import java.util.List; import java.util.Map; /** * 词库工具 * 扫描词库目录下的通用词库文件,并动态添加到Jieba分词词库中 * */ public class JEDictUtil { public static void loadStaticCommonDicts(final Map dictMap, String dictPath) { FileUtils.listDirFiles(dictPath, file -> { JedgeWordDict dict = loadStaticCommonDict(dictPath, file); if(dict!=null) { synchronized (dictMap) { dictMap.put(dict.getName(), dict); } } return null; }); } public static JedgeWordDict loadStaticCommonDict(String dictPath, File file) { if(file==null || !file.getName().endsWith(".dic") || !file.exists()) return null; List lines = StringUtils.readStringLinesFromFile(file); JedgeWordDict dict = new JedgeWordDict(file.getAbsolutePath()); String[] defaultWordDef = "100 nz".split(" "); for(String l : lines) { //添加词汇 if(StringUtils.isNotValidStr(l) || l.charAt(0) == '#') continue; if(l.startsWith("$")) { defaultWordDef = l.substring(1).trim().split(" "); continue; } String[] disc = l.split(" "); int idx = 0; String[] realWord = new String[3]; for(String d : disc) { if(d.isEmpty()) continue; realWord[idx++] = d; if(idx==3) break; } String word = realWord[0], freq = realWord[1]==null?defaultWordDef[0]:realWord[1], type = realWord[2]==null?defaultWordDef[1]:realWord[2]; if(StringUtils.isNotValidStr(word)) continue; dict.appendWordItem(new JedgeWordItem(word, type, freq, false)); } if(!dict.isEmpty()) return dict; return null; } }