搞定多音字!汉字转拼音的神器
最编程
2024-01-29 19:20:05
...
缘由:根据姓名创建账号,存在生僻字以及多音字
做法:自己根据网上信息自己写了类,词库可能不完整(有待补充)
其他不多说了,先上代码如下:
using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.Web; /// <summary> /// 实现汉字转拼音类 /// </summary> public static class PinyinHelper { /// <summary> /// 根据汉字获取拼音,如果不是汉字直接返回原字符 /// </summary> /// <param name="str">要转换的汉字</param> /// <param name="polyphone">是否支持多音字</param> /// <returns></returns> public static string[] GetPinyin(string str, bool polyphone) { string[] result = new string[] { }; string[] temp = new string[] { }; string[] strArray = GetStrArray(str); foreach (var strChar in strArray) { string pinyin = strChar; if (CheckChineseReg(strChar)) { string[] a = GetPinyinByOne(strChar, polyphone); if (a.Count() > 0) { pinyin = String.Join(" ", a); } else { pinyin = strChar; } } temp = ArrayAdd(temp, pinyin); } string[] newArray = handlePolyphone(temp); return newArray; } /// <summary> /// 分割字符串 /// </summary> /// <param name="str">待分割字符串</param> /// <returns></returns> public static string[] GetStrArray(string str) { //eachchar每个元素就是一个字。 string[] eachchar = str.Select(x => x.ToString()).ToArray(); return eachchar; } /// <summary> /// 用正则表达式判断字符是不是汉字 /// </summary> /// <param name="text">待判断字符或字符串</param> /// <returns>true是 false不是</returns> public static bool CheckChineseReg(string text) { bool res = false; if (Regex.IsMatch(text, @"[\u4e00-\u9fbb]+$")) res = true; return res; } /// <summary> /// 处理多音字,将类似['chang zhang', 'cheng'] 转换成 ['changcheng', 'zhangcheng'] /// </summary> /// <param name="array">转换前数组</param> /// <returns>转换后数组</returns> public static string[] handlePolyphone(string[] array) { string[] result = new string[] { }; string[] temp; for (var i = 0; i < array.Count(); i++) { temp = new string[] { }; var t = array[i].Split(new char[] { ' ' }); for (var j = 0; j < t.Count(); j++) { if (result.Count() > 0) { for (var k = 0; k < result.Count(); k++) { string newpy = result[k] + t[j]; temp = ArrayAdd(temp, newpy); } } else { string newpy = t[j]; temp = ArrayAdd(temp, newpy); } } result = temp; } return result; } /// <summary> /// 根据单个汉字获取拼音 /// </summary> /// <param name="str">单个汉字</param> /// <param name="polyphone">是否支持多音字 (否则根据字库中返回第一个拼音)</param> /// <returns></returns> public static string[] GetPinyinByOne(string str, bool polyphone) { string[] result = new string[] { }; for (int i = 0; i < NotOneArray.GetLength(0); i++) { string strPinyin = NotOneArray[i, 0].ToString(); string[] HanziArray = GetStrArray(NotOneArray[i, 1].ToString()); foreach (var hanzi in HanziArray) { if (hanzi == str) { result = ArrayAdd(result, strPinyin); if (!polyphone) { return result; } break; } } } return result; } /// <summary> /// 给数组添加项 /// </summary> /// <param name="array">原始数组</param> /// <param name="item">值</param> /// <returns></returns> public static string[] ArrayAdd(string[] array,string item) { List<string> b = array.ToList(); b.Add(item); return b.ToArray(); } /* 收录常用汉字6763个,不支持声调,支持多音字,并按照汉字使用频率由低到高排序 新增部分生僻字 (生僻字 读音) 犇 ben1 猋 biao1 骉 biao1 蟲 chong2 麤 cu1 掱 pa2 垚 yao2 烜 hui3 xuan3 煐 ying1 烓 wei1 焺 sheng2 燚 yi4 焜 kun1 珅 shen1 璟 jing3 琀 han2 珄 sheng1 瑢 rong2 瑱 tian4 琤 cheng1 玽 gou3 玭 pi2 玚 chang4 yang2 媖 ying1 媭 xu1 嫚 man4 婻 nan4 嬛 huan2 婋 xiao1 翀 chong1 翙 hui4 翯 he4 珝 xu3 翾 xuan1 昫 xu4 昉 fang3 晞 xi1 昍 xuan1 晢 zhe2 旸 yang2 暔 nan2 暎 ying4 晹 yi4 昇 sheng4 甠 qing2 暒 qing2 凊 qing4 浛 han2 湜 shi2 汧 qian1 沄 yun2 湦 sheng1 沕 mi4 wu4 陹 sheng1 竔 sheng1 琞 sheng4 祎 yi1 慤 que4 屾 shen1 奡 ao4 劼 jie2 弢 tao1 锳 ying1 骎 qin1 寗 ning2 郬 qing1 虓 xiao1 甦 su1 鹍 kun1 靘 qing4 飏 yang2 誩 jing4 臸 zhi4 競 jing4 喆 zhe2 棽 chen1 燊 shen1 顕 xian3 峣 yao2 堃 kun1 氹 dang4 冇 miao3 皛 xiao3 惢 suo3 畾 lei3 羴 shan1 鱻 xian1 赑 bi4 劦 lie4 xie2 聶 nie4 轟 hong1 龘 da2 厵 yuan2 靐 bing4 飝 fei1 刕 li2 叒 ruo4 壵 zhuang4 尛 mo2 孨 zhuan3 歮 se4 飍 xiu1 雥 za2 嚞 zhe2 譶 ta4 舙 hua4 馫 xin1 灥 xun2 嚄 huo4 o3 唞 dou2 奀 en1 啱 yan2 踎 mou2 喐 huo4 嚡 xie2 啩 gua4 嚿 huo4 啋 xiao1 cai3 惗 nian4 妳 ni3 nai3 吤 jie4 ge4 囖 luo1 2 嚟 li2 嫐 nao3 瞓 fen4 潎 pie1 pi4 嗻 zhe1 嘥 sai1 嘢 ye3 嗰 ge3 嗮 sai4 嘅 kai3 揾 wen4 唸 nian4 啰 luo 啫 zhe3 掟 zheng3 ding4 唓 che1 係 xi4 乸 na3 咗 zuo 咁 xian2 抦 bing3 佢 qu2 攰 gui4 甴 you2 曱 yue1 */ static string[,] NotOneArray = new string[,] { { "a", "阿啊呵腌嗄吖锕" }, { "e", "额阿俄恶鹅遏鄂厄饿峨扼娥鳄哦蛾噩愕讹锷垩婀鹗萼谔莪腭锇颚呃阏屙苊轭" }, { "ai", "爱埃艾碍癌哀挨矮隘蔼唉皑哎霭捱暧嫒嗳瑷嗌锿砹" }, { "ei", "诶" }, { "xi", "系西席息希习吸喜细析戏洗悉锡溪惜稀袭夕洒晰昔牺腊烯熙媳栖膝隙犀蹊硒兮熄曦禧嬉玺奚汐徙羲铣淅嘻歙熹矽蟋郗唏皙隰樨浠忾蜥檄郄翕阋鳃舾屣葸螅咭粞觋欷僖醯鼷裼穸饩舄禊诶菥蓰晞" }, { "yi", "一以已意议义益亿易医艺食依移衣异伊仪宜射遗疑毅谊亦疫役忆抑尾乙译翼蛇溢椅沂泄逸蚁夷邑怡绎彝裔姨熠贻矣屹颐倚诣胰奕翌疙弈轶蛾驿壹猗臆弋铱旖漪迤佚翊诒怿痍懿饴峄揖眙镒仡黟肄咿翳挹缢呓刈咦嶷羿钇殪荑薏蜴镱噫癔苡悒嗌瘗衤佾埸圯舣酏劓燚晹祎係" }, { "an", "安案按岸暗鞍氨俺胺铵谙庵黯鹌桉埯犴揞厂广" }, { "han", "厂汉韩含旱寒汗涵函喊憾罕焊翰邯撼瀚憨捍酣悍鼾邗颔蚶晗菡旰顸犴焓撖琀浛" }, { "ang", "昂仰盎肮" }, { "ao", "奥澳傲熬凹鳌敖遨鏖袄坳翱嗷拗懊岙螯骜獒鏊艹媪廒聱奡" }, { "wa", "瓦挖娃洼袜蛙凹哇佤娲呙腽" }, { "yu", "于与育余预域予遇奥语誉玉鱼雨渔裕愈娱欲吁舆宇羽逾豫郁寓吾狱喻御浴愉禹俞邪榆愚渝尉淤虞屿峪粥驭瑜禺毓钰隅芋熨瘀迂煜昱汩於臾盂聿竽萸妪腴圄谕觎揄龉谀俣馀庾妤瘐鬻欤鹬阈嵛雩鹆圉蜮伛纡窬窳饫蓣狳肀舁蝓燠" }, { "niu", "牛纽扭钮拗妞忸狃" }, { "o", "哦噢喔嚄" }, { "ba", "把八巴拔伯吧坝爸霸罢芭跋扒叭靶疤笆耙鲅粑岜灞钯捌菝魃茇" }, { "pa", "怕帕爬扒趴琶啪葩耙杷钯筢掱" }, { "pi", "被批副否皮坏辟啤匹披疲罢僻毗坯脾譬劈媲屁琵邳裨痞癖陂丕枇噼霹吡纰砒铍淠郫埤濞睥芘蚍圮鼙罴蜱疋貔仳庀擗甓陴玭潎" }, { "bi", "比必币笔毕秘避闭佛辟壁弊彼逼碧鼻臂蔽拂泌璧庇痹毙弼匕鄙陛裨贲敝蓖吡篦纰俾铋毖筚荸薜婢哔跸濞秕荜愎睥妣芘箅髀畀滗狴萆嬖襞舭赑" }, { "bai", "百白败摆伯拜柏佰掰呗擘捭稗" }, { "bo", "波博播勃拨薄佛伯玻搏柏泊舶剥渤卜驳簿脖膊簸菠礴箔铂亳钵帛擘饽跛钹趵檗啵鹁擗踣" }, { "bei", "北被备倍背杯勃贝辈悲碑臂卑悖惫蓓陂钡狈呗焙碚褙庳鞴孛鹎邶鐾" }, { "ban", "办版半班般板颁伴搬斑扮拌扳瓣坂阪绊钣瘢舨癍" }, { "pan", "判盘番潘攀盼拚畔胖叛拌蹒磐爿蟠泮袢襻丬" }, { "bin", "份宾频滨斌彬濒殡缤鬓槟摈膑玢镔豳髌傧" }, { "bang", "帮邦彭旁榜棒膀镑绑傍磅蚌谤梆浜蒡" }, { "pang", "旁庞乓磅螃彷滂逄耪" }, { "beng", "泵崩蚌蹦迸绷甭嘣甏堋" }, { "bao", "报保包宝暴胞薄爆炮饱抱堡剥鲍曝葆瀑豹刨褒雹孢苞煲褓趵鸨龅勹" }, { "bu", "不部步布补捕堡埔卜埠簿哺怖钚卟瓿逋晡醭钸" }, { "pu", "普暴铺浦朴堡葡谱埔扑仆蒲曝瀑溥莆圃璞濮菩蹼匍噗氆攵镨攴镤" }, { "mian", "面棉免绵缅勉眠冕娩腼渑湎沔黾宀眄" }, { "po", "破繁坡迫颇朴泊婆泼魄粕鄱珀陂叵笸泺皤钋钷" }, { "fan", "反范犯繁饭泛翻凡返番贩烦拚帆樊藩矾梵蕃钒幡畈蘩蹯燔" }, { "fu", "府服副负富复福夫妇幅付扶父符附腐赴佛浮覆辅傅伏抚赋辐腹弗肤阜袱缚甫氟斧孚敷俯拂俘咐腑孵芙涪釜脯茯馥宓绂讣呋罘麸蝠匐芾蜉跗凫滏蝮驸绋蚨砩桴赙菔呒趺苻拊阝鲋怫稃郛莩幞祓艴黻黼鳆" }, { "ben", "本体奔苯笨夯贲锛畚坌犇" }, { "feng", "风丰封峰奉凤锋冯逢缝蜂枫疯讽烽俸沣酆砜葑唪" }, { "bian", "变便边编遍辩鞭辨贬匾扁卞汴辫砭苄蝙鳊弁窆笾煸褊碥忭缏" }, { "pian", "便片篇偏骗翩扁骈胼蹁谝犏缏" }, { "zhen", "镇真针圳振震珍阵诊填侦臻贞枕桢赈祯帧甄斟缜箴疹砧榛鸩轸稹溱蓁胗椹朕畛浈" }, { "biao", "表标彪镖裱飚膘飙镳婊骠飑杓髟鳔灬瘭猋骉" }, { "piao", "票朴漂飘嫖瓢剽缥殍瞟骠嘌莩螵" }, { "huo", "和活或货获火伙惑霍祸豁嚯藿锪蠖钬耠镬夥灬劐攉嚄喐嚿" }, { "bie", "别鳖憋瘪蹩" }, { "min", "民敏闽闵皿泯岷悯珉抿黾缗玟愍苠鳘" }, { "fen", "分份纷奋粉氛芬愤粪坟汾焚酚吩忿棼玢鼢瀵偾鲼瞓" }, { "bing", "并病兵冰屏饼炳秉丙摒柄槟禀枋邴冫靐抦" }, { "geng", "更耕颈庚耿梗埂羹哽赓绠鲠" }, { "fang", "方放房防访纺芳仿坊妨肪邡舫彷枋鲂匚钫昉" }, { "xian", "现先县见线限显险献鲜洗宪纤陷闲贤仙衔掀咸嫌掺羡弦腺痫娴舷馅酰铣冼涎暹籼锨苋蚬跹岘藓燹鹇氙莶霰跣猃彡祆筅顕鱻咁" }, { "fou", "不否缶" }, { "ca", "拆擦嚓礤" }, { "cha", "查察差茶插叉刹茬楂岔诧碴嚓喳姹杈汊衩搽槎镲苴檫馇锸猹" }, { "cai", "才采财材菜彩裁蔡猜踩睬啋" }, { "can", "参残餐灿惨蚕掺璨惭粲孱骖黪" }, { "shen", "信深参身神什审申甚沈伸慎渗肾绅莘呻婶娠砷蜃哂椹葚吲糁渖诜谂矧胂珅屾燊" }, { "cen", "参岑涔" }, { "san", "三参散伞叁糁馓毵" }, { "cang", "藏仓苍沧舱臧伧" }, { "zang", "藏脏葬赃臧奘驵" }, { "chen", "称陈沈沉晨琛臣尘辰衬趁忱郴宸谌碜嗔抻榇伧谶龀肜棽" }, { "cao", "草操曹槽糙嘈漕螬艚屮" }, { "ce", "策测册侧厕栅恻" }, { "ze", "责则泽择侧咋啧仄箦赜笮舴昃迮帻" }, { "zhai", "债择齐宅寨侧摘窄斋祭翟砦瘵哜" }, { "dao", "到道导岛倒刀盗稻蹈悼捣叨祷焘氘纛刂帱忉" }, { "ceng", "层曾蹭噌" }, { "zha", "查扎炸诈闸渣咋乍榨楂札栅眨咤柞喳喋铡蚱吒怍砟揸痄哳齄" }, { "chai", "差拆柴钗豺侪虿瘥" }, { "ci", "次此差词辞刺瓷磁兹慈茨赐祠伺雌疵鹚糍呲粢" }, { "zi", "资自子字齐咨滋仔姿紫兹孜淄籽梓鲻渍姊吱秭恣甾孳訾滓锱辎趑龇赀眦缁呲笫谘嵫髭茈粢觜耔" }, { "cuo", "措错磋挫搓撮蹉锉厝嵯痤矬瘥脞鹾" }, { "chan", "产单阐崭缠掺禅颤铲蝉搀潺蟾馋忏婵孱觇廛谄谗澶骣羼躔蒇冁" }, { "shan", "山单善陕闪衫擅汕扇掺珊禅删膳缮赡鄯栅煽姗跚鳝嬗潸讪舢苫疝掸膻钐剡蟮芟埏彡骟羴" }, { "zhan", "展战占站崭粘湛沾瞻颤詹斩盏辗绽毡栈蘸旃谵搌" }, { "xin", "新心信辛欣薪馨鑫芯锌忻莘昕衅歆囟忄镡馫" }, { "lian", "联连练廉炼脸莲恋链帘怜涟敛琏镰濂楝鲢殓潋裢裣臁奁莶蠊蔹" }, { "chang", "场长厂常偿昌唱畅倡尝肠敞倘猖娼淌裳徜昶怅嫦菖鲳阊伥苌氅惝鬯玚" }, { "zhang", "长张章障涨掌帐胀彰丈仗漳樟账杖璋嶂仉瘴蟑獐幛鄣嫜" }, { "chao", "超朝潮炒钞抄巢吵剿绰嘲晁焯耖怊" }, { "zhao", "着照招找召朝赵兆昭肇罩钊沼嘲爪诏濯啁棹笊" }, { "zhou", "调州周洲舟骤轴昼宙粥皱肘咒帚胄绉纣妯啁诌繇碡籀酎荮" }, { "che", "车彻撤尺扯澈掣坼砗屮唓" }, { "ju", "车局据具举且居剧巨聚渠距句拒俱柜菊拘炬桔惧矩鞠驹锯踞咀瞿枸掬沮莒橘飓疽钜趄踽遽琚龃椐苣裾榘狙倨榉苴讵雎锔窭鞫犋屦醵" }, { "cheng", "成程城承称盛抢乘诚呈净惩撑澄秤橙骋逞瞠丞晟铛埕塍蛏柽铖酲裎枨琤" }, { "rong", "容荣融绒溶蓉熔戎榕茸冗嵘肜狨蝾瑢" }, { "sheng", "生声升胜盛乘圣剩牲甸省绳笙甥嵊晟渑眚焺珄昇湦陹竔琞" }, { "deng", "等登邓灯澄凳瞪蹬噔磴嶝镫簦戥" }, { "zhi", "制之治质职只志至指织支值知识直致执置止植纸拓智殖秩旨址滞氏枝芝脂帜汁肢挚稚酯掷峙炙栉侄芷窒咫吱趾痔蜘郅桎雉祉郦陟痣蛭帙枳踯徵胝栀贽祗豸鸷摭轵卮轾彘觯絷跖埴夂黹忮骘膣踬臸" }, { "zheng", "政正证争整征郑丁症挣蒸睁铮筝拯峥怔诤狰徵钲掟" }, { "tang", "堂唐糖汤塘躺趟倘棠烫淌膛搪镗傥螳溏帑羰樘醣螗耥铴瑭" }, { "chi", "持吃池迟赤驰尺斥齿翅匙痴耻炽侈弛叱啻坻眙嗤墀哧茌豉敕笞饬踟蚩柢媸魑篪褫彳鸱螭瘛眵傺" }, { "shi", "是时实事市十使世施式势视识师史示石食始士失适试什泽室似诗饰殖释驶氏硕逝湿蚀狮誓拾尸匙仕柿矢峙侍噬嗜栅拭嘘屎恃轼虱耆舐莳铈谥炻豕鲥饣螫酾筮埘弑礻蓍鲺贳湜" }, { "qi", "企其起期气七器汽奇齐启旗棋妻弃揭枝歧欺骑契迄亟漆戚岂稽岐琦栖缉琪泣乞砌祁崎绮祺祈凄淇杞脐麒圻憩芪伎俟畦耆葺沏萋骐鳍綦讫蕲屺颀亓碛柒啐汔綮萁嘁蛴槭欹芑桤丌蜞" }, { "chuai", "揣踹啜搋膪" }, { "tuo", "托脱拓拖妥驼陀沱鸵驮唾椭坨佗砣跎庹柁橐乇铊沲酡鼍箨柝" }, { "duo", "多度夺朵躲铎隋咄堕舵垛惰哆踱跺掇剁柁缍沲裰哚隳" }, { "xue", "学血雪削薛穴靴谑噱鳕踅泶彐" }, { "chong", "重种充冲涌崇虫宠忡憧舂茺铳艟蟲翀" }, { "chou", "筹抽绸酬愁丑臭仇畴稠瞅踌惆俦瘳雠帱" }, { "qiu", "求球秋丘邱仇酋裘龟囚遒鳅虬蚯泅楸湫犰逑巯艽俅蝤赇鼽糗" }, { "xiu", "修秀休宿袖绣臭朽锈羞嗅岫溴庥馐咻髹鸺貅飍" }, { "chu", "出处础初助除储畜触楚厨雏矗橱锄滁躇怵绌搐刍蜍黜杵蹰亍樗憷楮" }, { "tuan", "团揣湍疃抟彖" }, {
上一篇: 在Java中部署PaddleOCR
下一篇: 全面解读:汉字的Unicode编码表