参考flyerhzm的chinese_pinyin 这个gem, 我写了个python版的汉字转拼音脚本, 放在github上http://github.com/lxneng/xpinyin
#!/usr/bin/env python # encoding: utf-8 """ Created by Eric Lo on 2010-05-20. Copyright (c) 2010 [email protected]__. http://lxneng.com All rights reserved. """ class Pinyin(): def __init__(self, data_path='./Mandarin.dat'): self.dict = {} for line in open(data_path): k, v = line.split('\t') self.dict[k] = v self.splitter = '' def get_pinyin(self, chars=u"你好吗"): result = [] for char in chars: key = "%X" % ord(char) try: result.append(self.dict[key].split(" ")[0].strip()[:-1].lower()) except: result.append(char) return self.splitter.join(result) def get_initials(self, char=u'你'): try: return self.dict["%X" % ord(char)].split(" ")[0][0] except: return char
用法:
In [1]: from xpinyin import Pinyin In [2]: p = Pinyin() In [3]: p.get_pinyin(u"上海") Out[3]: 'shanghai' In [4]: p.get_initials(u"上") Out[4]: 'S'