Module polib
[hide private]
[frames] | no frames]

Source Code for Module polib

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # License: MIT (see LICENSE file provided) 
   5  # vim600: fdm=marker tabstop=4 shiftwidth=4 expandtab ai 
   6   
   7  # Description {{{ 
   8  """ 
   9  **polib** allows you to manipulate, create, modify gettext files (pot, po 
  10  and mo files).  You can load existing files, iterate through it's entries, 
  11  add, modify entries, comments or metadata, etc... or create new po files 
  12  from scratch. 
  13   
  14  **polib** provides a simple and pythonic API, exporting only three 
  15  convenience functions (*pofile*, *mofile* and *detect_encoding*), and the 
  16  four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating 
  17  new files/entries. 
  18   
  19  **Basic example**: 
  20   
  21  >>> import polib 
  22  >>> # load an existing po file 
  23  >>> po = polib.pofile('tests/test_utf8.po') 
  24  >>> for entry in po: 
  25  ...     # do something with entry... 
  26  ...     pass 
  27  >>> # add an entry 
  28  >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') 
  29  >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')] 
  30  >>> po.append(entry) 
  31  >>> # to save our modified po file: 
  32  >>> # po.save() 
  33  >>> # or you may want to compile the po file 
  34  >>> # po.save_as_mofile('tests/test_utf8.mo') 
  35  """ 
  36  # }}} 
  37   
  38  __author__    = 'David JEAN LOUIS <izimobil@gmail.com>' 
  39  __version__   = '0.3.1' 
  40   
  41   
  42  # dependencies {{{ 
  43  try: 
  44      import struct 
  45      import textwrap 
  46      import warnings 
  47  except ImportError, exc: 
  48      raise ImportError('polib requires python 2.3 or later with the standard' \ 
  49          ' modules "struct", "textwrap" and "warnings" (details: %s)' % exc) 
  50  # }}} 
  51   
  52  __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 
  53             'detect_encoding', 'quote', 'unquote'] 
  54   
  55  # shortcuts for performance improvement {{{ 
  56  # yes, yes, this is quite ugly but *very* efficient 
  57  _dictget    = dict.get 
  58  _listappend = list.append 
  59  _listpop    = list.pop 
  60  _strjoin    = str.join 
  61  _strsplit   = str.split 
  62  _strstrip   = str.strip 
  63  _strreplace = str.replace 
  64  _textwrap   = textwrap.wrap 
  65  # }}} 
  66   
  67  encoding = 'utf-8' 
  68   
69 -def pofile(fpath, wrapwidth=78, autodetect_encoding=True):
70 """ 71 Convenience function that parse the po/pot file *fpath* and return 72 a POFile instance. 73 74 **Keyword arguments**: 75 - *fpath*: string, full or relative path to the po/pot file to parse 76 - *wrapwidth*: integer, the wrap width, only useful when -w option was 77 passed to xgettext, default to 78 (optional) 78 - *autodetect_encoding*: boolean, if set to False the function will 79 not try to detect the po file encoding 80 81 **Example**: 82 83 >>> import polib 84 >>> po = polib.pofile('tests/test_utf8.po') 85 >>> po #doctest: +ELLIPSIS 86 <POFile instance at ...> 87 >>> import os, tempfile 88 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']: 89 ... orig_po = polib.pofile('tests/'+fname) 90 ... tmpf = tempfile.NamedTemporaryFile().name 91 ... orig_po.save(tmpf) 92 ... try: 93 ... new_po = polib.pofile(tmpf) 94 ... for old, new in zip(orig_po, new_po): 95 ... if old.msgid != new.msgid: 96 ... old.msgid 97 ... new.msgid 98 ... if old.msgstr != new.msgstr: 99 ... old.msgid 100 ... new.msgid 101 ... finally: 102 ... os.unlink(tmpf) 103 """ 104 # pofile {{{ 105 if autodetect_encoding == True: 106 global encoding 107 encoding = detect_encoding(fpath) 108 parser = _POFileParser(fpath) 109 instance = parser.parse() 110 instance.wrapwidth = wrapwidth 111 return instance
112 # }}} 113 114
115 -def mofile(fpath, wrapwidth=78, autodetect_encoding=True):
116 """ 117 Convenience function that parse the mo file *fpath* and return 118 a MOFile instance. 119 120 **Keyword arguments**: 121 - *fpath*: string, full or relative path to the mo file to parse 122 - *wrapwidth*: integer, the wrap width, only useful when -w option was 123 passed to xgettext to generate the po file that was used to format 124 the mo file, default to 78 (optional) 125 - *autodetect_encoding*: boolean, if set to False the function will 126 not try to detect the po file encoding 127 128 **Example**: 129 130 >>> import polib 131 >>> mo = polib.mofile('tests/test_utf8.mo') 132 >>> mo #doctest: +ELLIPSIS 133 <MOFile instance at ...> 134 >>> import os, tempfile 135 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']: 136 ... orig_mo = polib.mofile('tests/'+fname) 137 ... tmpf = tempfile.NamedTemporaryFile().name 138 ... orig_mo.save(tmpf) 139 ... try: 140 ... new_mo = polib.mofile(tmpf) 141 ... for old, new in zip(orig_mo, new_mo): 142 ... if old.msgid != new.msgid: 143 ... old.msgstr 144 ... new.msgstr 145 ... finally: 146 ... os.unlink(tmpf) 147 """ 148 # mofile {{{ 149 if autodetect_encoding == True: 150 global encoding 151 encoding = detect_encoding(fpath) 152 parser = _MOFileParser(fpath) 153 instance = parser.parse() 154 instance.wrapwidth = wrapwidth 155 return instance
156 # }}} 157 158
159 -def detect_encoding(fpath):
160 """ 161 Try to detect the encoding used by the file *fpath*. The function will 162 return polib default *encoding* if it's unable to detect it. 163 164 **Keyword argument**: 165 - *fpath*: string, full or relative path to the mo file to parse. 166 167 **Examples**: 168 169 >>> print detect_encoding('tests/test_noencoding.po') 170 utf-8 171 >>> print detect_encoding('tests/test_utf8.po') 172 UTF-8 173 >>> print detect_encoding('tests/test_utf8.mo') 174 UTF-8 175 >>> print detect_encoding('tests/test_iso-8859-15.po') 176 ISO_8859-15 177 >>> print detect_encoding('tests/test_iso-8859-15.mo') 178 ISO_8859-15 179 """ 180 # detect_encoding {{{ 181 import re 182 global encoding 183 encoding = 'utf-8' 184 e = None 185 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') 186 f = open(fpath) 187 for l in f: 188 match = rx.search(l) 189 if match: 190 e = _strstrip(match.group(1)) 191 break 192 f.close() 193 if e is not None: 194 return e 195 return encoding
196 # }}} 197 198
199 -def quote(st):
200 """ 201 Quote and return the given string *st*. 202 203 **Examples**: 204 205 >>> quote('\\t and \\n and \\r and " and \\\\') 206 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\' 207 """ 208 # quote {{{ 209 st = _strreplace(st, '\\', r'\\') 210 st = _strreplace(st, '\t', r'\t') 211 st = _strreplace(st, '\r', r'\r') 212 st = _strreplace(st, '\n', r'\n') 213 st = _strreplace(st, '\"', r'\"') 214 return st
215 # }}} 216 217
218 -def unquote(st):
219 """ 220 Unquote and return the given string *st*. 221 222 **Examples**: 223 224 >>> unquote('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\') 225 '\\t and \\n and \\r and " and \\\\' 226 """ 227 # unquote {{{ 228 st = _strreplace(st, r'\"', '"') 229 st = _strreplace(st, r'\n', '\n') 230 st = _strreplace(st, r'\r', '\r') 231 st = _strreplace(st, r'\t', '\t') 232 st = _strreplace(st, r'\\', '\\') 233 return st
234 # }}} 235 236
237 -class _BaseFile(list):
238 """ 239 Common parent class for POFile and MOFile classes. 240 This class must **not** be instanciated directly. 241 """ 242 # class _BaseFile {{{ 243 244
245 - def __init__(self, fpath=None, wrapwidth=78):
246 """ 247 Constructor. 248 249 **Keyword arguments**: 250 - *fpath*: string, path to po or mo file 251 - *wrapwidth*: integer, the wrap width, only useful when -w option 252 was passed to xgettext to generate the po file that was used to 253 format the mo file, default to 78 (optional). 254 """ 255 list.__init__(self) 256 # the opened file handle 257 self.fpath = fpath 258 # the width at which lines should be wrapped 259 self.wrapwidth = wrapwidth 260 # header 261 self.header = '' 262 # both po and mo files have metadata 263 self.metadata = {} 264 self.metadata_is_fuzzy = 0
265
266 - def __str__(self):
267 """String representation of the file.""" 268 ret = [] 269 entries = [self.metadata_as_entry()] + self 270 for entry in entries: 271 _listappend(ret, entry.__str__(self.wrapwidth)) 272 return _strjoin('\n', ret)
273
274 - def __repr__(self):
275 """Return the official string representation of the object.""" 276 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
277
278 - def metadata_as_entry(self):
279 """Return the metadata as an entry""" 280 e = POEntry(msgid='') 281 mdata = self.ordered_metadata() 282 if mdata: 283 strs = [] 284 for name, value in mdata: 285 # Strip whitespace off each line in a multi-line entry 286 value = _strjoin('\n', [_strstrip(v) 287 for v in _strsplit(value, '\n')]) 288 _listappend(strs, '%s: %s' % (name, value)) 289 e.msgstr = _strjoin('\n', strs) + '\n' 290 return e
291
292 - def save(self, fpath=None, repr_method='__str__'):
293 """ 294 Save the po file to file *fpath* if no file handle exists for 295 the object. If there's already an open file and no fpath is 296 provided, then the existing file is rewritten with the modified 297 data. 298 299 **Keyword arguments**: 300 - *fpath*: string, full or relative path to the file. 301 - *repr_method*: string, the method to use for output. 302 """ 303 if self.fpath is None and fpath is None: 304 raise IOError('You must provide a file path to save() method') 305 contents = getattr(self, repr_method)() 306 if fpath is None: 307 fpath = self.fpath 308 mode = 'w' 309 if repr_method == 'to_binary': 310 mode += 'b' 311 fhandle = open(fpath, mode) 312 fhandle.write(contents) 313 fhandle.close()
314
315 - def find(self, st, by='msgid'):
316 """ 317 Find entry which msgid (or property identified by the *by* 318 attribute) matches the string *st*. 319 320 **Examples**: 321 322 >>> po = pofile('tests/test_utf8.po') 323 >>> entry = po.find('Thursday') 324 >>> entry.msgstr 325 'Jueves' 326 >>> entry = po.find('Some unexistant msgid') 327 >>> entry is None 328 True 329 >>> entry = po.find('Jueves', 'msgstr') 330 >>> entry.msgid 331 'Thursday' 332 """ 333 try: 334 return [e for e in self if getattr(e, by) == st][0] 335 except IndexError: 336 return None
337
338 - def ordered_metadata(self):
339 """ 340 Convenience method that return the metadata ordered. The return 341 value is list of tuples (metadata name, metadata_value). 342 """ 343 # copy the dict first 344 metadata = self.metadata.copy() 345 data_order = [ 346 'Project-Id-Version', 347 'Report-Msgid-Bugs-To', 348 'POT-Creation-Date', 349 'PO-Revision-Date', 350 'Last-Translator', 351 'Language-Team', 352 'MIME-Version', 353 'Content-Type', 354 'Content-Transfer-Encoding' 355 ] 356 ordered_data = [] 357 for data in data_order: 358 try: 359 value = metadata.pop(data) 360 _listappend(ordered_data, (data, value)) 361 except KeyError: 362 pass 363 # the rest of the metadata won't be ordered there are no specs for this 364 keys = metadata.keys() 365 keys.sort() 366 for data in keys: 367 value = metadata[data] 368 _listappend(ordered_data, (data, value)) 369 return ordered_data
370
371 - def to_binary(self):
372 """Return the mofile binary representation.""" 373 import struct 374 import array 375 output = '' 376 offsets = [] 377 ids = strs = '' 378 entries = self.translated_entries() 379 # the keys are sorted in the .mo file 380 def cmp(_self, other): 381 if _self.msgid > other.msgid: 382 return 1 383 elif _self.msgid < other.msgid: 384 return -1 385 else: 386 return 0
387 entries.sort(cmp) 388 # add metadata entry 389 mentry = self.metadata_as_entry() 390 mentry.msgstr = _strreplace(mentry.msgstr, '\\n', '').lstrip() + '\n' 391 entries = [mentry] + entries 392 entries_len = len(entries) 393 for e in entries: 394 # For each string, we need size and file offset. Each string is NUL 395 # terminated; the NUL does not count into the size. 396 msgid = e._decode(e.msgid) 397 msgstr = e._decode(e.msgstr) 398 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) 399 ids += msgid + '\0' 400 strs += msgstr + '\0' 401 # The header is 7 32-bit unsigned integers. 402 keystart = 7*4+16*entries_len 403 # and the values start after the keys 404 valuestart = keystart + len(ids) 405 koffsets = [] 406 voffsets = [] 407 # The string table first has the list of keys, then the list of values. 408 # Each entry has first the size of the string, then the file offset. 409 for o1, l1, o2, l2 in offsets: 410 koffsets += [l1, o1+keystart] 411 voffsets += [l2, o2+valuestart] 412 offsets = koffsets + voffsets 413 output = struct.pack("Iiiiiii", 414 0x950412de, # Magic number 415 0, # Version 416 entries_len, # # of entries 417 7*4, # start of key index 418 7*4+entries_len*8, # start of value index 419 0, 0) # size and offset of hash table 420 output += array.array("i", offsets).tostring() 421 output += ids 422 output += strs 423 return output
424 # }}} 425 426
427 -class POFile(_BaseFile):
428 ''' 429 Po (or Pot) file reader/writer. 430 POFile objects inherit the list objects methods. 431 432 **Example**: 433 434 >>> po = POFile() 435 >>> entry1 = POEntry( 436 ... msgid="Some english text", 437 ... msgstr="Un texte en anglais" 438 ... ) 439 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)] 440 >>> entry1.comment = "Some useful comment" 441 >>> entry2 = POEntry( 442 ... msgid="I need my dirty cheese", 443 ... msgstr="Je veux mon sale fromage" 444 ... ) 445 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)] 446 >>> entry2.comment = "Another useful comment" 447 >>> entry3 = POEntry( 448 ... msgid='Some entry with quotes " \\"', 449 ... msgstr=u'Un message unicode avec des quotes " \\"' 450 ... ) 451 >>> entry3.comment = "Test string quoting" 452 >>> po.append(entry1) 453 >>> po.append(entry2) 454 >>> po.append(entry3) 455 >>> po.header = "Some Header" 456 >>> print po 457 # Some Header 458 msgid "" 459 msgstr "" 460 <BLANKLINE> 461 #. Some useful comment 462 #: testfile:12 another_file:1 463 msgid "Some english text" 464 msgstr "Un texte en anglais" 465 <BLANKLINE> 466 #. Another useful comment 467 #: testfile:15 another_file:5 468 msgid "I need my dirty cheese" 469 msgstr "Je veux mon sale fromage" 470 <BLANKLINE> 471 #. Test string quoting 472 msgid "Some entry with quotes \\" \\"" 473 msgstr "Un message unicode avec des quotes \\" \\"" 474 <BLANKLINE> 475 ''' 476 # class POFile {{{ 477
478 - def __str__(self):
479 """Return the string representation of the po file""" 480 ret, headers = '', _strsplit(self.header, '\n') 481 for header in headers: 482 if header[:1] in [',', ':']: 483 ret += '#%s\n' % header 484 else: 485 ret += '# %s\n' % header 486 return ret + _BaseFile.__str__(self)
487
488 - def save_as_mofile(self, fpath):
489 """ 490 Save the binary representation of the file to *fpath*. 491 492 **Keyword arguments**: 493 - *fpath*: string, full or relative path to the file. 494 """ 495 _BaseFile.save(self, fpath, 'to_binary')
496
497 - def percent_translated(self):
498 """ 499 Convenience method that return the percentage of translated 500 messages. 501 502 **Example**: 503 504 >>> import polib 505 >>> po = polib.pofile('tests/test_pofile_helpers.po') 506 >>> po.percent_translated() 507 50 508 """ 509 total = len([e for e in self if not e.obsolete]) 510 translated = len(self.translated_entries()) 511 return int((100.00 / float(total)) * translated)
512
513 - def translated_entries(self):
514 """ 515 Convenience method that return a list of translated entries. 516 517 **Example**: 518 519 >>> import polib 520 >>> po = polib.pofile('tests/test_pofile_helpers.po') 521 >>> len(po.translated_entries()) 522 5 523 """ 524 return [e for e in self if e.translated() and not e.obsolete]
525
526 - def untranslated_entries(self):
527 """ 528 Convenience method that return a list of untranslated entries. 529 530 **Example**: 531 532 >>> import polib 533 >>> po = polib.pofile('tests/test_pofile_helpers.po') 534 >>> len(po.untranslated_entries()) 535 5 536 """ 537 return [e for e in self if not e.translated() and not e.obsolete]
538
539 - def fuzzy_entries(self):
540 """ 541 Convenience method that return the list of 'fuzzy' entries. 542 543 **Example**: 544 545 >>> import polib 546 >>> po = polib.pofile('tests/test_pofile_helpers.po') 547 >>> len(po.fuzzy_entries()) 548 2 549 """ 550 return [e for e in self if 'fuzzy' in e.flags]
551
552 - def obsolete_entries(self):
553 """ 554 Convenience method that return the list of obsolete entries. 555 556 **Example**: 557 558 >>> import polib 559 >>> po = polib.pofile('tests/test_pofile_helpers.po') 560 >>> len(po.obsolete_entries()) 561 4 562 """ 563 return [e for e in self if e.obsolete]
564 # }}} 565 566
567 -class MOFile(_BaseFile):
568 ''' 569 Mo file reader/writer. 570 MOFile objects inherit the list objects methods. 571 572 **Example**: 573 574 >>> mo = MOFile() 575 >>> entry1 = POEntry( 576 ... msgid="Some english text", 577 ... msgstr="Un texte en anglais" 578 ... ) 579 >>> entry2 = POEntry( 580 ... msgid="I need my dirty cheese", 581 ... msgstr="Je veux mon sale fromage" 582 ... ) 583 >>> entry3 = MOEntry( 584 ... msgid='Some entry with quotes " \\"', 585 ... msgstr=u'Un message unicode avec des quotes " \\"' 586 ... ) 587 >>> mo.append(entry1) 588 >>> mo.append(entry2) 589 >>> mo.append(entry3) 590 >>> print mo 591 msgid "" 592 msgstr "" 593 <BLANKLINE> 594 msgid "Some english text" 595 msgstr "Un texte en anglais" 596 <BLANKLINE> 597 msgid "I need my dirty cheese" 598 msgstr "Je veux mon sale fromage" 599 <BLANKLINE> 600 msgid "Some entry with quotes \\" \\"" 601 msgstr "Un message unicode avec des quotes \\" \\"" 602 <BLANKLINE> 603 ''' 604 # class MOFile {{{ 605
606 - def __init__(self, fpath=None, wrapwidth=78):
607 """ 608 MOFile constructor. 609 See _BaseFile.__construct. 610 """ 611 _BaseFile.__init__(self, fpath, wrapwidth) 612 self.magic_number = None 613 self.version = 0
614
615 - def save_as_pofile(self, fpath):
616 """ 617 Save the string representation of the file to *fpath*. 618 619 **Keyword argument**: 620 - *fpath*: string, full or relative path to the file. 621 """ 622 _BaseFile.save(self, fpath)
623
624 - def save(self, fpath):
625 """ 626 Save the binary representation of the file to *fpath*. 627 628 **Keyword argument**: 629 - *fpath*: string, full or relative path to the file. 630 """ 631 _BaseFile.save(self, fpath, 'to_binary')
632
633 - def percent_translated(self):
634 """ 635 Convenience method to keep the same interface with POFile instances. 636 """ 637 return 100
638
639 - def translated_entries(self):
640 """ 641 Convenience method to keep the same interface with POFile instances. 642 """ 643 return self
644
645 - def untranslated_entries(self):
646 """ 647 Convenience method to keep the same interface with POFile instances. 648 """ 649 return []
650
651 - def fuzzy_entries(self):
652 """ 653 Convenience method to keep the same interface with POFile instances. 654 """ 655 return []
656
657 - def obsolete_entries(self):
658 """ 659 Convenience method to keep the same interface with POFile instances. 660 """ 661 return []
662 # }}} 663 664
665 -class _BaseEntry(object):
666 """ 667 Base class for POEntry or MOEntry objects. 668 This class must *not* be instanciated directly. 669 """ 670 # class _BaseEntry {{{ 671
672 - def __init__(self, *args, **kwargs):
673 """Base Entry constructor.""" 674 self.msgid = _dictget(kwargs, 'msgid', '') 675 self.msgstr = _dictget(kwargs, 'msgstr', '') 676 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '') 677 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {}) 678 self.obsolete = _dictget(kwargs, 'obsolete', False)
679
680 - def __repr__(self):
681 """Return the official string representation of the object.""" 682 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
683
684 - def __str__(self, wrapwidth=78):
685 """ 686 Common string representation of the POEntry and MOEntry 687 objects. 688 """ 689 if self.obsolete: 690 delflag = '#~ ' 691 else: 692 delflag = '' 693 # write the msgid 694 ret = [] 695 ret += self._str_field("msgid", delflag, "", self.msgid) 696 # write the msgid_plural if any 697 if self.msgid_plural: 698 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) 699 if self.msgstr_plural: 700 # write the msgstr_plural if any 701 msgstrs = self.msgstr_plural 702 keys = msgstrs.keys() 703 keys.sort() 704 for index in keys: 705 msgstr = msgstrs[index] 706 plural_index = '[%s]' % index 707 ret += self._str_field("msgstr", delflag, plural_index, msgstr) 708 else: 709 # otherwise write the msgstr 710 ret += self._str_field("msgstr", delflag, "", self.msgstr) 711 _listappend(ret, '') 712 return _strjoin('\n', ret)
713
714 - def _str_field(self, fieldname, delflag, plural_index, field):
715 field = self._decode(field) 716 lines = field.splitlines(True) # keep line breaks in strings 717 # potentially, we could do line-wrapping here, but textwrap.wrap 718 # treats whitespace too carelessly for us to use it. 719 if len(lines) > 1: 720 lines = ['']+lines # start with initial empty line 721 else: 722 lines = [field] # needed for the empty string case 723 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 724 quote(_listpop(lines, 0)))] 725 for mstr in lines: 726 _listappend(ret, '%s"%s"' % (delflag, quote(mstr))) 727 return ret
728
729 - def _decode(self, st):
730 if isinstance(st, unicode): 731 return st.encode(encoding) 732 return st
733 # }}} 734 735
736 -class POEntry(_BaseEntry):
737 """ 738 Represents a po file entry. 739 740 **Examples**: 741 742 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') 743 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)] 744 >>> print entry 745 #: welcome.py:12 anotherfile.py:34 746 msgid "Welcome" 747 msgstr "Bienvenue" 748 <BLANKLINE> 749 >>> entry = POEntry() 750 >>> entry.occurrences = [('src/spam.c', 32), ('src/eggs.c', 45)] 751 >>> entry.tcomment = 'A plural translation' 752 >>> entry.flags.append('c-format') 753 >>> entry.msgid = 'I have spam but no egg !' 754 >>> entry.msgid_plural = 'I have spam and %d eggs !' 755 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" 756 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" 757 >>> print entry 758 # A plural translation 759 #: src/spam.c:32 src/eggs.c:45 760 #, c-format 761 msgid "I have spam but no egg !" 762 msgid_plural "I have spam and %d eggs !" 763 msgstr[0] "J'ai du jambon mais aucun oeuf !" 764 msgstr[1] "J'ai du jambon et %d oeufs !" 765 <BLANKLINE> 766 """ 767 # class POEntry {{{ 768
769 - def __init__(self, *args, **kwargs):
770 """POEntry constructor.""" 771 _BaseEntry.__init__(self, *args, **kwargs) 772 self.comment = _dictget(kwargs, 'comment', '') 773 self.tcomment = _dictget(kwargs, 'tcomment', '') 774 self.occurrences = _dictget(kwargs, 'occurrences', []) 775 # XXX will be removed in next version 776 if _dictget(kwargs, 'occurences') is not None: 777 self.occurences = _dictget(kwargs, 'occurences') 778 self.flags = _dictget(kwargs, 'flags', [])
779
780 - def __str__(self, wrapwidth=78):
781 """ 782 Return the string representation of the entry. 783 """ 784 ret = [] 785 # comment first, if any (with text wrapping as xgettext does) 786 if self.comment != '': 787 comments = _strsplit(self._decode(self.comment), '\n') 788 for comment in comments: 789 if wrapwidth > 0 and len(comment) > wrapwidth-3: 790 lines = _textwrap(comment, wrapwidth, 791 initial_indent='#. ', 792 subsequent_indent='#. ', 793 break_long_words=False) 794 _listappend(ret, lines) 795 else: 796 _listappend(ret, '#. %s' % comment) 797 # translator comment, if any (with text wrapping as xgettext does) 798 if self.tcomment != '': 799 tcomments = _strsplit(self._decode(self.tcomment), '\n') 800 for tcomment in tcomments: 801 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: 802 lines = _textwrap(tcomment, wrapwidth, 803 initial_indent='# ', 804 subsequent_indent='# ', 805 break_long_words=False) 806 _listappend(ret, lines) 807 else: 808 _listappend(ret, '# %s' % tcomment) 809 # occurrences (with text wrapping as xgettext does) 810 if self.occurrences: 811 filelist = [] 812 for fpath, lineno in self.occurrences: 813 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno)) 814 filestr = _strjoin(' ', filelist) 815 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: 816 # XXX textwrap split words that contain hyphen, this is not 817 # what we want for filenames, so the dirty hack is to 818 # temporally replace hyphens with a char that a file cannot 819 # contain, like "*" 820 lines = _strreplace(filestr, '-', '*') 821 lines = _textwrap(filestr, wrapwidth, 822 initial_indent='#: ', 823 subsequent_indent='#: ', 824 break_long_words=False) 825 # end of the replace hack 826 for line in lines: 827 _listappend(ret, _strreplace(line, '*', '-')) 828 else: 829 _listappend(ret, '#: '+filestr) 830 # flags 831 if self.flags: 832 flags = [] 833 for flag in self.flags: 834 _listappend(flags, flag) 835 _listappend(ret, '#, %s' % _strjoin(', ', flags)) 836 _listappend(ret, _BaseEntry.__str__(self)) 837 return _strjoin('\n', ret)
838
839 - def translated(self):
840 """Return True if the entry has been translated or False""" 841 return ((self.msgstr != '' or self.msgstr_plural) and \ 842 (not self.obsolete and 'fuzzy' not in self.flags))
843
844 - def __getattr__(self, name):
845 if name == 'occurences': 846 warnings.warn( 847 '"occurences" property is deprecated (it was a typo), '\ 848 'please use "occurrences" instead' 849 ) 850 return self.occurrences 851 return object.__getattr__(self, name)
852
853 - def __setattr__(self, name, value):
854 if name == 'occurences': 855 warnings.warn( 856 '"occurences" property is deprecated (it was a typo), '\ 857 'please use "occurrences" instead' 858 ) 859 self.occurrences = value 860 else: 861 object.__setattr__(self, name, value)
862 863 # }}} 864 865
866 -class MOEntry(_BaseEntry):
867 """ 868 Represents a mo file entry. 869 870 **Examples**: 871 872 >>> entry = MOEntry() 873 >>> entry.msgid = 'translate me !' 874 >>> entry.msgstr = 'traduisez moi !' 875 >>> print entry 876 msgid "translate me !" 877 msgstr "traduisez moi !" 878 <BLANKLINE> 879 """ 880 # class MOEntry {{{ 881
882 - def __str__(self, wrapwidth=78):
883 """ 884 Return the string representation of the entry. 885 """ 886 return _BaseEntry.__str__(self, wrapwidth)
887 # }}} 888 889
890 -class _POFileParser(object):
891 """ 892 A finite state machine to parse efficiently and correctly po 893 file format. 894 """ 895 # class _POFileParser {{{
896 - def __init__(self, fpath):
897 """ 898 Constructor. 899 900 **Keyword argument**: 901 - *fpath*: string, path to the po file 902 """ 903 self.fhandle = open(fpath, 'r') 904 self.instance = POFile(fpath=fpath) 905 self.transitions = {} 906 self.current_entry = POEntry() 907 self.current_state = 'ST' 908 self.current_token = None 909 # two memo flags used in handlers 910 self.msgstr_index = 0 911 self.entry_obsolete = 0 912 # Configure the state machine, by adding transitions. 913 # Signification of symbols: 914 # * ST: Beginning of the file (start) 915 # * HE: Header 916 # * TC: a translation comment 917 # * GC: a generated comment 918 # * OC: a file/line occurence 919 # * FL: a flags line 920 # * MI: a msgid 921 # * MP: a msgid plural 922 # * MS: a msgstr 923 # * MX: a msgstr plural 924 # * MC: a msgid or msgstr continuation line 925 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'] 926 927 self.add('TC', ['ST', 'HE'], 'HE') 928 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC') 929 self.add('GC', all_, 'GC') 930 self.add('OC', all_, 'OC') 931 self.add('FL', all_, 'FL') 932 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI') 933 self.add('MP', ['TC', 'GC', 'MI'], 'MP') 934 self.add('MS', ['MI', 'MP', 'TC'], 'MS') 935 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') 936 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
937
938 - def parse(self):
939 """ 940 Run the state machine, parse the file line by line and call process() 941 with the current matched symbol. 942 """ 943 i, lastlen = 1, 0 944 for line in self.fhandle: 945 line = _strstrip(line) 946 if line == '': 947 i = i+1 948 continue 949 if line[:3] == '#~ ': 950 line = line[3:] 951 self.entry_obsolete = 1 952 else: 953 self.entry_obsolete = 0 954 self.current_token = line 955 if line[:2] == '#:': 956 # we are on a occurrences line 957 self.process('OC', i) 958 elif line[:7] == 'msgid "': 959 # we are on a msgid 960 self.process('MI', i) 961 elif line[:8] == 'msgstr "': 962 # we are on a msgstr 963 self.process('MS', i) 964 elif line[:1] == '"': 965 # we are on a continuation line or some metadata 966 self.process('MC', i) 967 elif line[:14] == 'msgid_plural "': 968 # we are on a msgid plural 969 self.process('MP', i) 970 elif line[:7] == 'msgstr[': 971 # we are on a msgstr plural 972 self.process('MX', i) 973 elif line[:3] == '#, ': 974 # we are on a flags line 975 self.process('FL', i) 976 elif line[:2] == '# ' or line == '#': 977 if line == '#': line = line + ' ' 978 # we are on a translator comment line 979 self.process('TC', i) 980 elif line[:2] == '#.': 981 # we are on a generated comment line 982 self.process('GC', i) 983 i = i+1 984 985 if self.current_entry: 986 # since entries are added when another entry is found, we must add 987 # the last entry here (only if there are lines) 988 _listappend(self.instance, self.current_entry) 989 # before returning the instance, check if there's metadata and if 990 # so extract it in a dict 991 firstentry = self.instance[0] 992 if firstentry.msgid == '': # metadata found 993 # remove the entry 994 firstentry = _listpop(self.instance, 0) 995 self.instance.metadata_is_fuzzy = firstentry.flags 996 key = None 997 for msg in firstentry.msgstr.splitlines(): 998 try: 999 key, val = _strsplit(msg, ':', 1) 1000 self.instance.metadata[key] = _strstrip(val) 1001 except: 1002 if key is not None: 1003 self.instance.metadata[key] += '\n'+_strstrip(msg) 1004 # close opened file 1005 self.fhandle.close() 1006 return self.instance
1007
1008 - def add(self, symbol, states, next_state):
1009 """ 1010 Add a transition to the state machine. 1011 Keywords arguments: 1012 1013 symbol -- string, the matched token (two chars symbol) 1014 states -- list, a list of states (two chars symbols) 1015 next_state -- the next state the fsm will have after the action 1016 """ 1017 for state in states: 1018 action = getattr(self, 'handle_%s' % next_state.lower()) 1019 self.transitions[(symbol, state)] = (action, next_state)
1020
1021 - def process(self, symbol, linenum):
1022 """ 1023 Process the transition corresponding to the current state and the 1024 symbol provided. 1025 1026 Keywords arguments: 1027 symbol -- string, the matched token (two chars symbol) 1028 linenum -- integer, the current line number of the parsed file 1029 """ 1030 try: 1031 (action, state) = self.transitions[(symbol, self.current_state)] 1032 if action(): 1033 self.current_state = state 1034 except Exception, e: 1035 raise IOError('Syntax error in po file (line %s): %s' % \ 1036 (linenum, e))
1037 1038 # state handlers 1039
1040 - def handle_he(self):
1041 """Handle a header comment.""" 1042 if self.instance.header != '': 1043 self.instance.header += '\n' 1044 self.instance.header += self.current_token[2:] 1045 return 1
1046
1047 - def handle_tc(self):
1048 """Handle a translator comment.""" 1049 if self.current_state in ['MC', 'MS', 'MX']: 1050 _listappend(self.instance, self.current_entry) 1051 self.current_entry = POEntry() 1052 if self.current_entry.tcomment != '': 1053 self.current_entry.tcomment += '\n' 1054 self.current_entry.tcomment += self.current_token[2:] 1055 return True
1056
1057 - def handle_gc(self):
1058 """Handle a generated comment.""" 1059 if self.current_state in ['MC', 'MS', 'MX']: 1060 _listappend(self.instance, self.current_entry) 1061 self.current_entry = POEntry() 1062 if self.current_entry.comment != '': 1063 self.current_entry.comment += '\n' 1064 self.current_entry.comment += self.current_token[3:] 1065 return True
1066
1067 - def handle_oc(self):
1068 """Handle a file:num occurence.""" 1069 if self.current_state in ['MC', 'MS', 'MX']: 1070 _listappend(self.instance, self.current_entry) 1071 self.current_entry = POEntry() 1072 occurrences = _strsplit(self.current_token[3:]) 1073 for occurrence in occurrences: 1074 if occurrence != '': 1075 fil, line = _strsplit(occurrence, ':') 1076 _listappend(self.current_entry.occurrences, (fil, line)) 1077 return True
1078
1079 - def handle_fl(self):
1080 """Handle a flags line.""" 1081 if self.current_state in ['MC', 'MS', 'MX']: 1082 _listappend(self.instance, self.current_entry) 1083 self.current_entry = POEntry() 1084 self.current_entry.flags += _strsplit(self.current_token[3:], ', ') 1085 return True
1086
1087 - def handle_mi(self):
1088 """Handle a msgid.""" 1089 if self.current_state in ['MC', 'MS', 'MX']: 1090 _listappend(self.instance, self.current_entry) 1091 self.current_entry = POEntry() 1092 self.current_entry.obsolete = self.entry_obsolete 1093 self.current_entry.msgid = unquote(self.current_token[7:-1]) 1094 return True
1095
1096 - def handle_mp(self):
1097 """Handle a msgid plural.""" 1098 self.current_entry.msgid_plural = unquote(self.current_token[14:-1]) 1099 return True
1100
1101 - def handle_ms(self):
1102 """Handle a msgstr.""" 1103 self.current_entry.msgstr = unquote(self.current_token[8:-1]) 1104 return True
1105
1106 - def handle_mx(self):
1107 """Handle a msgstr plural.""" 1108 index, value = self.current_token[7], self.current_token[11:-1] 1109 self.current_entry.msgstr_plural[index] = unquote(value) 1110 self.msgstr_index = index 1111 return True
1112
1113 - def handle_mc(self):
1114 """Handle a msgid or msgstr continuation line.""" 1115 if self.current_state == 'MI': 1116 self.current_entry.msgid += unquote(self.current_token[1:-1]) 1117 elif self.current_state == 'MP': 1118 self.current_entry.msgid_plural += \ 1119 unquote(self.current_token[1:-1]) 1120 elif self.current_state == 'MS': 1121 self.current_entry.msgstr += unquote(self.current_token[1:-1]) 1122 elif self.current_state == 'MX': 1123 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\ 1124 unquote(self.current_token[1:-1]) 1125 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr 1126 # don't change the current state 1127 return False
1128 # }}} 1129 1130
1131 -class _MOFileParser(object):
1132 """ 1133 A class to parse binary mo files. 1134 """ 1135 # class _MOFileParser {{{
1136 - def __init__(self, fpath):
1137 """_MOFileParser constructor.""" 1138 self.fhandle = open(fpath, 'rb') 1139 self.instance = MOFile(fpath)
1140
1141 - def parse_magicnumber(self):
1142 """ 1143 Parse the magic number and raise an exception if not valid. 1144 """ 1145 magic_number = self._readbinary(fmt='4s') 1146 # magic number must be 0xde120495 or 0x950412de 1147 if magic_number not in ['\xde\x12\x04\x95', '\x95\x04\x12\xde']: 1148 raise IOError('Invalid mo file, magic number is incorrect !') 1149 self.instance.magic_number = magic_number
1150
1151 - def parse(self):
1152 """ 1153 Build the instance with the file handle provided in the 1154 constructor. 1155 """ 1156 self.parse_magicnumber() 1157 # parse the version number 1158 self.instance.version = self._readbinary('L') 1159 # parse the number of strings 1160 numofstrings = self._readbinary('L') 1161 # original strings hash table offset 1162 msgids_hash_offset = self._readbinary('L') 1163 # translation strings hash table offset 1164 msgstrs_hash_offset = self._readbinary('P') 1165 # move to msgid hash table and read length and offset of msgids 1166 self.fhandle.seek(msgids_hash_offset) 1167 msgids_index = [] 1168 for i in range(numofstrings): 1169 _listappend(msgids_index, self._readbinary('LL')) 1170 # move to msgstr hash table and read length and offset of msgstrs 1171 self.fhandle.seek(msgstrs_hash_offset) 1172 msgstrs_index = [] 1173 for i in range(numofstrings): 1174 _listappend(msgstrs_index, self._readbinary('LL')) 1175 # build entries 1176 for i in range(numofstrings): 1177 self.fhandle.seek(msgids_index[i][1]) 1178 msgid = self.fhandle.read(msgids_index[i][0]) 1179 self.fhandle.seek(msgstrs_index[i][1]) 1180 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1181 if i == 0: # metadata 1182 raw_metadata, metadata = _strsplit(msgstr, '\n'), {} 1183 for line in raw_metadata: 1184 tokens = _strsplit(line, ':', 1) 1185 if tokens[0] != '': 1186 try: 1187 metadata[tokens[0]] = _strstrip(tokens[1]) 1188 except IndexError: 1189 metadata[tokens[0]] = '' 1190 self.instance.metadata = metadata 1191 continue 1192 entry = MOEntry(msgid=msgid, msgstr=msgstr) 1193 _listappend(self.instance, entry) 1194 # close opened file 1195 self.fhandle.close() 1196 return self.instance
1197
1198 - def _readbinary(self, fmt='c'):
1199 """ 1200 Private method that unpack n bytes of data using format <fmt>. 1201 It returns a tuple or a mixed value if the tuple length is 1. 1202 """ 1203 numbytes = struct.calcsize(fmt) 1204 bytes = self.fhandle.read(numbytes) 1205 tup = struct.unpack(fmt, bytes) 1206 if len(tup) == 1: 1207 return tup[0] 1208 return tup
1209 # }}} 1210 1211 1212 if __name__ == '__main__': 1213 """ 1214 **Main function**:: 1215 - to **test** the module just run: *python polib.py [-v]* 1216 - to **profile** the module: *python polib.py -p <some_pofile.po>* 1217 """ 1218 # main function {{{ 1219 import sys 1220 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1221 - def test(f):
1222 if f.endswith('po'): 1223 p = pofile(f) 1224 else: 1225 p = mofile(f) 1226 s = str(p)
1227 import profile 1228 profile.run('test("'+sys.argv[2]+'")') 1229 else: 1230 import doctest 1231 doctest.testmod() 1232 # }}} 1233