pinyin.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #coding=utf-8
  2. """
  3. Created by Eric Lo on 2010-05-20.
  4. Copyright (c) 2010 __lxneng@gmail.com__. http://lxneng.com All rights reserved.
  5. """
  6. from django.conf import settings
  7. class Pinyin():
  8. def __init__(self, data_path='Mandarin.dat'):
  9. self.dict = {}
  10. data_path = settings.BASE_DIR + "/libs/" + data_path
  11. for line in open(data_path):
  12. k, v = line.split('\t')
  13. self.dict[k] = v
  14. self.splitter = ''
  15. def get_pinyin(self, chars=u""):
  16. result = []
  17. for char in chars:
  18. key = "%X" % ord(char)
  19. try:
  20. result.append(self.dict[key].split(" ")[0].strip()[:-1].lower())
  21. except Exception, e:
  22. #print e
  23. result.append(char)
  24. return self.splitter.join(result)
  25. def get_pinyin_first(self, chars=u""):
  26. result = []
  27. for char in chars:
  28. key = "%X" % ord(char)
  29. try:
  30. result.append(self.dict[key].split(" ")[0].strip()[0].lower())
  31. except Exception, e:
  32. #print e
  33. result.append(char)
  34. return self.splitter.join(result)
  35. def get_initials(self, char=u''):
  36. try:
  37. return self.dict["%X" % ord(char)].split(" ")[0][0]
  38. except Exception, e:
  39. #print e
  40. return char