1
2
3
4
5
6
7
8 """
9 **polib** allows you to manipulate, create, modify gettext files (pot, po
10 and mo files). You can load existing files, iterate through it's entries,
11 add, modify entries, comments or metadata, etc... or create new po files
12 from scratch.
13
14 **polib** provides a simple and pythonic API, exporting only three
15 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
16 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
17 new files/entries.
18
19 **Basic example**:
20
21 >>> import polib
22 >>> # load an existing po file
23 >>> po = polib.pofile('tests/test_utf8.po')
24 >>> for entry in po:
25 ... # do something with entry...
26 ... pass
27 >>> # add an entry
28 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
29 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
30 >>> po.append(entry)
31 >>> # to save our modified po file:
32 >>> # po.save()
33 >>> # or you may want to compile the po file
34 >>> # po.save_as_mofile('tests/test_utf8.mo')
35 """
36
37
38 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
39 __version__ = '0.3.1'
40
41
42
43 try:
44 import struct
45 import textwrap
46 import warnings
47 except ImportError, exc:
48 raise ImportError('polib requires python 2.3 or later with the standard' \
49 ' modules "struct", "textwrap" and "warnings" (details: %s)' % exc)
50
51
52 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
53 'detect_encoding', 'quote', 'unquote']
54
55
56
57 _dictget = dict.get
58 _listappend = list.append
59 _listpop = list.pop
60 _strjoin = str.join
61 _strsplit = str.split
62 _strstrip = str.strip
63 _strreplace = str.replace
64 _textwrap = textwrap.wrap
65
66
67 encoding = 'utf-8'
68
69 -def pofile(fpath, wrapwidth=78, autodetect_encoding=True):
70 """
71 Convenience function that parse the po/pot file *fpath* and return
72 a POFile instance.
73
74 **Keyword arguments**:
75 - *fpath*: string, full or relative path to the po/pot file to parse
76 - *wrapwidth*: integer, the wrap width, only useful when -w option was
77 passed to xgettext, default to 78 (optional)
78 - *autodetect_encoding*: boolean, if set to False the function will
79 not try to detect the po file encoding
80
81 **Example**:
82
83 >>> import polib
84 >>> po = polib.pofile('tests/test_utf8.po')
85 >>> po #doctest: +ELLIPSIS
86 <POFile instance at ...>
87 >>> import os, tempfile
88 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
89 ... orig_po = polib.pofile('tests/'+fname)
90 ... tmpf = tempfile.NamedTemporaryFile().name
91 ... orig_po.save(tmpf)
92 ... try:
93 ... new_po = polib.pofile(tmpf)
94 ... for old, new in zip(orig_po, new_po):
95 ... if old.msgid != new.msgid:
96 ... old.msgid
97 ... new.msgid
98 ... if old.msgstr != new.msgstr:
99 ... old.msgid
100 ... new.msgid
101 ... finally:
102 ... os.unlink(tmpf)
103 """
104
105 if autodetect_encoding == True:
106 global encoding
107 encoding = detect_encoding(fpath)
108 parser = _POFileParser(fpath)
109 instance = parser.parse()
110 instance.wrapwidth = wrapwidth
111 return instance
112
113
114
115 -def mofile(fpath, wrapwidth=78, autodetect_encoding=True):
116 """
117 Convenience function that parse the mo file *fpath* and return
118 a MOFile instance.
119
120 **Keyword arguments**:
121 - *fpath*: string, full or relative path to the mo file to parse
122 - *wrapwidth*: integer, the wrap width, only useful when -w option was
123 passed to xgettext to generate the po file that was used to format
124 the mo file, default to 78 (optional)
125 - *autodetect_encoding*: boolean, if set to False the function will
126 not try to detect the po file encoding
127
128 **Example**:
129
130 >>> import polib
131 >>> mo = polib.mofile('tests/test_utf8.mo')
132 >>> mo #doctest: +ELLIPSIS
133 <MOFile instance at ...>
134 >>> import os, tempfile
135 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
136 ... orig_mo = polib.mofile('tests/'+fname)
137 ... tmpf = tempfile.NamedTemporaryFile().name
138 ... orig_mo.save(tmpf)
139 ... try:
140 ... new_mo = polib.mofile(tmpf)
141 ... for old, new in zip(orig_mo, new_mo):
142 ... if old.msgid != new.msgid:
143 ... old.msgstr
144 ... new.msgstr
145 ... finally:
146 ... os.unlink(tmpf)
147 """
148
149 if autodetect_encoding == True:
150 global encoding
151 encoding = detect_encoding(fpath)
152 parser = _MOFileParser(fpath)
153 instance = parser.parse()
154 instance.wrapwidth = wrapwidth
155 return instance
156
157
158
160 """
161 Try to detect the encoding used by the file *fpath*. The function will
162 return polib default *encoding* if it's unable to detect it.
163
164 **Keyword argument**:
165 - *fpath*: string, full or relative path to the mo file to parse.
166
167 **Examples**:
168
169 >>> print detect_encoding('tests/test_noencoding.po')
170 utf-8
171 >>> print detect_encoding('tests/test_utf8.po')
172 UTF-8
173 >>> print detect_encoding('tests/test_utf8.mo')
174 UTF-8
175 >>> print detect_encoding('tests/test_iso-8859-15.po')
176 ISO_8859-15
177 >>> print detect_encoding('tests/test_iso-8859-15.mo')
178 ISO_8859-15
179 """
180
181 import re
182 global encoding
183 encoding = 'utf-8'
184 e = None
185 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
186 f = open(fpath)
187 for l in f:
188 match = rx.search(l)
189 if match:
190 e = _strstrip(match.group(1))
191 break
192 f.close()
193 if e is not None:
194 return e
195 return encoding
196
197
198
200 """
201 Quote and return the given string *st*.
202
203 **Examples**:
204
205 >>> quote('\\t and \\n and \\r and " and \\\\')
206 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
207 """
208
209 st = _strreplace(st, '\\', r'\\')
210 st = _strreplace(st, '\t', r'\t')
211 st = _strreplace(st, '\r', r'\r')
212 st = _strreplace(st, '\n', r'\n')
213 st = _strreplace(st, '\"', r'\"')
214 return st
215
216
217
219 """
220 Unquote and return the given string *st*.
221
222 **Examples**:
223
224 >>> unquote('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
225 '\\t and \\n and \\r and " and \\\\'
226 """
227
228 st = _strreplace(st, r'\"', '"')
229 st = _strreplace(st, r'\n', '\n')
230 st = _strreplace(st, r'\r', '\r')
231 st = _strreplace(st, r'\t', '\t')
232 st = _strreplace(st, r'\\', '\\')
233 return st
234
235
236
238 """
239 Common parent class for POFile and MOFile classes.
240 This class must **not** be instanciated directly.
241 """
242
243
244
245 - def __init__(self, fpath=None, wrapwidth=78):
246 """
247 Constructor.
248
249 **Keyword arguments**:
250 - *fpath*: string, path to po or mo file
251 - *wrapwidth*: integer, the wrap width, only useful when -w option
252 was passed to xgettext to generate the po file that was used to
253 format the mo file, default to 78 (optional).
254 """
255 list.__init__(self)
256
257 self.fpath = fpath
258
259 self.wrapwidth = wrapwidth
260
261 self.header = ''
262
263 self.metadata = {}
264 self.metadata_is_fuzzy = 0
265
267 """String representation of the file."""
268 ret = []
269 entries = [self.metadata_as_entry()] + self
270 for entry in entries:
271 _listappend(ret, entry.__str__(self.wrapwidth))
272 return _strjoin('\n', ret)
273
275 """Return the official string representation of the object."""
276 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
277
279 """Return the metadata as an entry"""
280 e = POEntry(msgid='')
281 mdata = self.ordered_metadata()
282 if mdata:
283 strs = []
284 for name, value in mdata:
285
286 value = _strjoin('\n', [_strstrip(v)
287 for v in _strsplit(value, '\n')])
288 _listappend(strs, '%s: %s' % (name, value))
289 e.msgstr = _strjoin('\n', strs) + '\n'
290 return e
291
292 - def save(self, fpath=None, repr_method='__str__'):
293 """
294 Save the po file to file *fpath* if no file handle exists for
295 the object. If there's already an open file and no fpath is
296 provided, then the existing file is rewritten with the modified
297 data.
298
299 **Keyword arguments**:
300 - *fpath*: string, full or relative path to the file.
301 - *repr_method*: string, the method to use for output.
302 """
303 if self.fpath is None and fpath is None:
304 raise IOError('You must provide a file path to save() method')
305 contents = getattr(self, repr_method)()
306 if fpath is None:
307 fpath = self.fpath
308 mode = 'w'
309 if repr_method == 'to_binary':
310 mode += 'b'
311 fhandle = open(fpath, mode)
312 fhandle.write(contents)
313 fhandle.close()
314
315 - def find(self, st, by='msgid'):
316 """
317 Find entry which msgid (or property identified by the *by*
318 attribute) matches the string *st*.
319
320 **Examples**:
321
322 >>> po = pofile('tests/test_utf8.po')
323 >>> entry = po.find('Thursday')
324 >>> entry.msgstr
325 'Jueves'
326 >>> entry = po.find('Some unexistant msgid')
327 >>> entry is None
328 True
329 >>> entry = po.find('Jueves', 'msgstr')
330 >>> entry.msgid
331 'Thursday'
332 """
333 try:
334 return [e for e in self if getattr(e, by) == st][0]
335 except IndexError:
336 return None
337
370
372 """Return the mofile binary representation."""
373 import struct
374 import array
375 output = ''
376 offsets = []
377 ids = strs = ''
378 entries = self.translated_entries()
379
380 def cmp(_self, other):
381 if _self.msgid > other.msgid:
382 return 1
383 elif _self.msgid < other.msgid:
384 return -1
385 else:
386 return 0
387 entries.sort(cmp)
388
389 mentry = self.metadata_as_entry()
390 mentry.msgstr = _strreplace(mentry.msgstr, '\\n', '').lstrip() + '\n'
391 entries = [mentry] + entries
392 entries_len = len(entries)
393 for e in entries:
394
395
396 msgid = e._decode(e.msgid)
397 msgstr = e._decode(e.msgstr)
398 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
399 ids += msgid + '\0'
400 strs += msgstr + '\0'
401
402 keystart = 7*4+16*entries_len
403
404 valuestart = keystart + len(ids)
405 koffsets = []
406 voffsets = []
407
408
409 for o1, l1, o2, l2 in offsets:
410 koffsets += [l1, o1+keystart]
411 voffsets += [l2, o2+valuestart]
412 offsets = koffsets + voffsets
413 output = struct.pack("Iiiiiii",
414 0x950412de,
415 0,
416 entries_len,
417 7*4,
418 7*4+entries_len*8,
419 0, 0)
420 output += array.array("i", offsets).tostring()
421 output += ids
422 output += strs
423 return output
424
425
426
428 '''
429 Po (or Pot) file reader/writer.
430 POFile objects inherit the list objects methods.
431
432 **Example**:
433
434 >>> po = POFile()
435 >>> entry1 = POEntry(
436 ... msgid="Some english text",
437 ... msgstr="Un texte en anglais"
438 ... )
439 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
440 >>> entry1.comment = "Some useful comment"
441 >>> entry2 = POEntry(
442 ... msgid="I need my dirty cheese",
443 ... msgstr="Je veux mon sale fromage"
444 ... )
445 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
446 >>> entry2.comment = "Another useful comment"
447 >>> entry3 = POEntry(
448 ... msgid='Some entry with quotes " \\"',
449 ... msgstr=u'Un message unicode avec des quotes " \\"'
450 ... )
451 >>> entry3.comment = "Test string quoting"
452 >>> po.append(entry1)
453 >>> po.append(entry2)
454 >>> po.append(entry3)
455 >>> po.header = "Some Header"
456 >>> print po
457 # Some Header
458 msgid ""
459 msgstr ""
460 <BLANKLINE>
461 #. Some useful comment
462 #: testfile:12 another_file:1
463 msgid "Some english text"
464 msgstr "Un texte en anglais"
465 <BLANKLINE>
466 #. Another useful comment
467 #: testfile:15 another_file:5
468 msgid "I need my dirty cheese"
469 msgstr "Je veux mon sale fromage"
470 <BLANKLINE>
471 #. Test string quoting
472 msgid "Some entry with quotes \\" \\""
473 msgstr "Un message unicode avec des quotes \\" \\""
474 <BLANKLINE>
475 '''
476
477
479 """Return the string representation of the po file"""
480 ret, headers = '', _strsplit(self.header, '\n')
481 for header in headers:
482 if header[:1] in [',', ':']:
483 ret += '#%s\n' % header
484 else:
485 ret += '# %s\n' % header
486 return ret + _BaseFile.__str__(self)
487
489 """
490 Save the binary representation of the file to *fpath*.
491
492 **Keyword arguments**:
493 - *fpath*: string, full or relative path to the file.
494 """
495 _BaseFile.save(self, fpath, 'to_binary')
496
498 """
499 Convenience method that return the percentage of translated
500 messages.
501
502 **Example**:
503
504 >>> import polib
505 >>> po = polib.pofile('tests/test_pofile_helpers.po')
506 >>> po.percent_translated()
507 50
508 """
509 total = len([e for e in self if not e.obsolete])
510 translated = len(self.translated_entries())
511 return int((100.00 / float(total)) * translated)
512
514 """
515 Convenience method that return a list of translated entries.
516
517 **Example**:
518
519 >>> import polib
520 >>> po = polib.pofile('tests/test_pofile_helpers.po')
521 >>> len(po.translated_entries())
522 5
523 """
524 return [e for e in self if e.translated() and not e.obsolete]
525
527 """
528 Convenience method that return a list of untranslated entries.
529
530 **Example**:
531
532 >>> import polib
533 >>> po = polib.pofile('tests/test_pofile_helpers.po')
534 >>> len(po.untranslated_entries())
535 5
536 """
537 return [e for e in self if not e.translated() and not e.obsolete]
538
540 """
541 Convenience method that return the list of 'fuzzy' entries.
542
543 **Example**:
544
545 >>> import polib
546 >>> po = polib.pofile('tests/test_pofile_helpers.po')
547 >>> len(po.fuzzy_entries())
548 2
549 """
550 return [e for e in self if 'fuzzy' in e.flags]
551
553 """
554 Convenience method that return the list of obsolete entries.
555
556 **Example**:
557
558 >>> import polib
559 >>> po = polib.pofile('tests/test_pofile_helpers.po')
560 >>> len(po.obsolete_entries())
561 4
562 """
563 return [e for e in self if e.obsolete]
564
565
566
568 '''
569 Mo file reader/writer.
570 MOFile objects inherit the list objects methods.
571
572 **Example**:
573
574 >>> mo = MOFile()
575 >>> entry1 = POEntry(
576 ... msgid="Some english text",
577 ... msgstr="Un texte en anglais"
578 ... )
579 >>> entry2 = POEntry(
580 ... msgid="I need my dirty cheese",
581 ... msgstr="Je veux mon sale fromage"
582 ... )
583 >>> entry3 = MOEntry(
584 ... msgid='Some entry with quotes " \\"',
585 ... msgstr=u'Un message unicode avec des quotes " \\"'
586 ... )
587 >>> mo.append(entry1)
588 >>> mo.append(entry2)
589 >>> mo.append(entry3)
590 >>> print mo
591 msgid ""
592 msgstr ""
593 <BLANKLINE>
594 msgid "Some english text"
595 msgstr "Un texte en anglais"
596 <BLANKLINE>
597 msgid "I need my dirty cheese"
598 msgstr "Je veux mon sale fromage"
599 <BLANKLINE>
600 msgid "Some entry with quotes \\" \\""
601 msgstr "Un message unicode avec des quotes \\" \\""
602 <BLANKLINE>
603 '''
604
605
606 - def __init__(self, fpath=None, wrapwidth=78):
607 """
608 MOFile constructor.
609 See _BaseFile.__construct.
610 """
611 _BaseFile.__init__(self, fpath, wrapwidth)
612 self.magic_number = None
613 self.version = 0
614
616 """
617 Save the string representation of the file to *fpath*.
618
619 **Keyword argument**:
620 - *fpath*: string, full or relative path to the file.
621 """
622 _BaseFile.save(self, fpath)
623
624 - def save(self, fpath):
625 """
626 Save the binary representation of the file to *fpath*.
627
628 **Keyword argument**:
629 - *fpath*: string, full or relative path to the file.
630 """
631 _BaseFile.save(self, fpath, 'to_binary')
632
634 """
635 Convenience method to keep the same interface with POFile instances.
636 """
637 return 100
638
640 """
641 Convenience method to keep the same interface with POFile instances.
642 """
643 return self
644
646 """
647 Convenience method to keep the same interface with POFile instances.
648 """
649 return []
650
652 """
653 Convenience method to keep the same interface with POFile instances.
654 """
655 return []
656
658 """
659 Convenience method to keep the same interface with POFile instances.
660 """
661 return []
662
663
664
665 -class _BaseEntry(object):
666 """
667 Base class for POEntry or MOEntry objects.
668 This class must *not* be instanciated directly.
669 """
670
671
672 - def __init__(self, *args, **kwargs):
673 """Base Entry constructor."""
674 self.msgid = _dictget(kwargs, 'msgid', '')
675 self.msgstr = _dictget(kwargs, 'msgstr', '')
676 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '')
677 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {})
678 self.obsolete = _dictget(kwargs, 'obsolete', False)
679
680 - def __repr__(self):
681 """Return the official string representation of the object."""
682 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
683
684 - def __str__(self, wrapwidth=78):
685 """
686 Common string representation of the POEntry and MOEntry
687 objects.
688 """
689 if self.obsolete:
690 delflag = '#~ '
691 else:
692 delflag = ''
693
694 ret = []
695 ret += self._str_field("msgid", delflag, "", self.msgid)
696
697 if self.msgid_plural:
698 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
699 if self.msgstr_plural:
700
701 msgstrs = self.msgstr_plural
702 keys = msgstrs.keys()
703 keys.sort()
704 for index in keys:
705 msgstr = msgstrs[index]
706 plural_index = '[%s]' % index
707 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
708 else:
709
710 ret += self._str_field("msgstr", delflag, "", self.msgstr)
711 _listappend(ret, '')
712 return _strjoin('\n', ret)
713
714 - def _str_field(self, fieldname, delflag, plural_index, field):
715 field = self._decode(field)
716 lines = field.splitlines(True)
717
718
719 if len(lines) > 1:
720 lines = ['']+lines
721 else:
722 lines = [field]
723 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
724 quote(_listpop(lines, 0)))]
725 for mstr in lines:
726 _listappend(ret, '%s"%s"' % (delflag, quote(mstr)))
727 return ret
728
729 - def _decode(self, st):
730 if isinstance(st, unicode):
731 return st.encode(encoding)
732 return st
733
734
735
736 -class POEntry(_BaseEntry):
737 """
738 Represents a po file entry.
739
740 **Examples**:
741
742 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
743 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
744 >>> print entry
745 #: welcome.py:12 anotherfile.py:34
746 msgid "Welcome"
747 msgstr "Bienvenue"
748 <BLANKLINE>
749 >>> entry = POEntry()
750 >>> entry.occurrences = [('src/spam.c', 32), ('src/eggs.c', 45)]
751 >>> entry.tcomment = 'A plural translation'
752 >>> entry.flags.append('c-format')
753 >>> entry.msgid = 'I have spam but no egg !'
754 >>> entry.msgid_plural = 'I have spam and %d eggs !'
755 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
756 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
757 >>> print entry
758 # A plural translation
759 #: src/spam.c:32 src/eggs.c:45
760 #, c-format
761 msgid "I have spam but no egg !"
762 msgid_plural "I have spam and %d eggs !"
763 msgstr[0] "J'ai du jambon mais aucun oeuf !"
764 msgstr[1] "J'ai du jambon et %d oeufs !"
765 <BLANKLINE>
766 """
767
768
769 - def __init__(self, *args, **kwargs):
770 """POEntry constructor."""
771 _BaseEntry.__init__(self, *args, **kwargs)
772 self.comment = _dictget(kwargs, 'comment', '')
773 self.tcomment = _dictget(kwargs, 'tcomment', '')
774 self.occurrences = _dictget(kwargs, 'occurrences', [])
775
776 if _dictget(kwargs, 'occurences') is not None:
777 self.occurences = _dictget(kwargs, 'occurences')
778 self.flags = _dictget(kwargs, 'flags', [])
779
780 - def __str__(self, wrapwidth=78):
781 """
782 Return the string representation of the entry.
783 """
784 ret = []
785
786 if self.comment != '':
787 comments = _strsplit(self._decode(self.comment), '\n')
788 for comment in comments:
789 if wrapwidth > 0 and len(comment) > wrapwidth-3:
790 lines = _textwrap(comment, wrapwidth,
791 initial_indent='#. ',
792 subsequent_indent='#. ',
793 break_long_words=False)
794 _listappend(ret, lines)
795 else:
796 _listappend(ret, '#. %s' % comment)
797
798 if self.tcomment != '':
799 tcomments = _strsplit(self._decode(self.tcomment), '\n')
800 for tcomment in tcomments:
801 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
802 lines = _textwrap(tcomment, wrapwidth,
803 initial_indent='# ',
804 subsequent_indent='# ',
805 break_long_words=False)
806 _listappend(ret, lines)
807 else:
808 _listappend(ret, '# %s' % tcomment)
809
810 if self.occurrences:
811 filelist = []
812 for fpath, lineno in self.occurrences:
813 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno))
814 filestr = _strjoin(' ', filelist)
815 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
816
817
818
819
820 lines = _strreplace(filestr, '-', '*')
821 lines = _textwrap(filestr, wrapwidth,
822 initial_indent='#: ',
823 subsequent_indent='#: ',
824 break_long_words=False)
825
826 for line in lines:
827 _listappend(ret, _strreplace(line, '*', '-'))
828 else:
829 _listappend(ret, '#: '+filestr)
830
831 if self.flags:
832 flags = []
833 for flag in self.flags:
834 _listappend(flags, flag)
835 _listappend(ret, '#, %s' % _strjoin(', ', flags))
836 _listappend(ret, _BaseEntry.__str__(self))
837 return _strjoin('\n', ret)
838
839 - def translated(self):
840 """Return True if the entry has been translated or False"""
841 return ((self.msgstr != '' or self.msgstr_plural) and \
842 (not self.obsolete and 'fuzzy' not in self.flags))
843
844 - def __getattr__(self, name):
845 if name == 'occurences':
846 warnings.warn(
847 '"occurences" property is deprecated (it was a typo), '\
848 'please use "occurrences" instead'
849 )
850 return self.occurrences
851 return object.__getattr__(self, name)
852
853 - def __setattr__(self, name, value):
854 if name == 'occurences':
855 warnings.warn(
856 '"occurences" property is deprecated (it was a typo), '\
857 'please use "occurrences" instead'
858 )
859 self.occurrences = value
860 else:
861 object.__setattr__(self, name, value)
862
863
864
865
866 -class MOEntry(_BaseEntry):
867 """
868 Represents a mo file entry.
869
870 **Examples**:
871
872 >>> entry = MOEntry()
873 >>> entry.msgid = 'translate me !'
874 >>> entry.msgstr = 'traduisez moi !'
875 >>> print entry
876 msgid "translate me !"
877 msgstr "traduisez moi !"
878 <BLANKLINE>
879 """
880
881
882 - def __str__(self, wrapwidth=78):
883 """
884 Return the string representation of the entry.
885 """
886 return _BaseEntry.__str__(self, wrapwidth)
887
888
889
891 """
892 A finite state machine to parse efficiently and correctly po
893 file format.
894 """
895
897 """
898 Constructor.
899
900 **Keyword argument**:
901 - *fpath*: string, path to the po file
902 """
903 self.fhandle = open(fpath, 'r')
904 self.instance = POFile(fpath=fpath)
905 self.transitions = {}
906 self.current_entry = POEntry()
907 self.current_state = 'ST'
908 self.current_token = None
909
910 self.msgstr_index = 0
911 self.entry_obsolete = 0
912
913
914
915
916
917
918
919
920
921
922
923
924
925 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI']
926
927 self.add('TC', ['ST', 'HE'], 'HE')
928 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
929 self.add('GC', all_, 'GC')
930 self.add('OC', all_, 'OC')
931 self.add('FL', all_, 'FL')
932 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI')
933 self.add('MP', ['TC', 'GC', 'MI'], 'MP')
934 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
935 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
936 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
937
939 """
940 Run the state machine, parse the file line by line and call process()
941 with the current matched symbol.
942 """
943 i, lastlen = 1, 0
944 for line in self.fhandle:
945 line = _strstrip(line)
946 if line == '':
947 i = i+1
948 continue
949 if line[:3] == '#~ ':
950 line = line[3:]
951 self.entry_obsolete = 1
952 else:
953 self.entry_obsolete = 0
954 self.current_token = line
955 if line[:2] == '#:':
956
957 self.process('OC', i)
958 elif line[:7] == 'msgid "':
959
960 self.process('MI', i)
961 elif line[:8] == 'msgstr "':
962
963 self.process('MS', i)
964 elif line[:1] == '"':
965
966 self.process('MC', i)
967 elif line[:14] == 'msgid_plural "':
968
969 self.process('MP', i)
970 elif line[:7] == 'msgstr[':
971
972 self.process('MX', i)
973 elif line[:3] == '#, ':
974
975 self.process('FL', i)
976 elif line[:2] == '# ' or line == '#':
977 if line == '#': line = line + ' '
978
979 self.process('TC', i)
980 elif line[:2] == '#.':
981
982 self.process('GC', i)
983 i = i+1
984
985 if self.current_entry:
986
987
988 _listappend(self.instance, self.current_entry)
989
990
991 firstentry = self.instance[0]
992 if firstentry.msgid == '':
993
994 firstentry = _listpop(self.instance, 0)
995 self.instance.metadata_is_fuzzy = firstentry.flags
996 key = None
997 for msg in firstentry.msgstr.splitlines():
998 try:
999 key, val = _strsplit(msg, ':', 1)
1000 self.instance.metadata[key] = _strstrip(val)
1001 except:
1002 if key is not None:
1003 self.instance.metadata[key] += '\n'+_strstrip(msg)
1004
1005 self.fhandle.close()
1006 return self.instance
1007
1008 - def add(self, symbol, states, next_state):
1009 """
1010 Add a transition to the state machine.
1011 Keywords arguments:
1012
1013 symbol -- string, the matched token (two chars symbol)
1014 states -- list, a list of states (two chars symbols)
1015 next_state -- the next state the fsm will have after the action
1016 """
1017 for state in states:
1018 action = getattr(self, 'handle_%s' % next_state.lower())
1019 self.transitions[(symbol, state)] = (action, next_state)
1020
1021 - def process(self, symbol, linenum):
1022 """
1023 Process the transition corresponding to the current state and the
1024 symbol provided.
1025
1026 Keywords arguments:
1027 symbol -- string, the matched token (two chars symbol)
1028 linenum -- integer, the current line number of the parsed file
1029 """
1030 try:
1031 (action, state) = self.transitions[(symbol, self.current_state)]
1032 if action():
1033 self.current_state = state
1034 except Exception, e:
1035 raise IOError('Syntax error in po file (line %s): %s' % \
1036 (linenum, e))
1037
1038
1039
1041 """Handle a header comment."""
1042 if self.instance.header != '':
1043 self.instance.header += '\n'
1044 self.instance.header += self.current_token[2:]
1045 return 1
1046
1048 """Handle a translator comment."""
1049 if self.current_state in ['MC', 'MS', 'MX']:
1050 _listappend(self.instance, self.current_entry)
1051 self.current_entry = POEntry()
1052 if self.current_entry.tcomment != '':
1053 self.current_entry.tcomment += '\n'
1054 self.current_entry.tcomment += self.current_token[2:]
1055 return True
1056
1058 """Handle a generated comment."""
1059 if self.current_state in ['MC', 'MS', 'MX']:
1060 _listappend(self.instance, self.current_entry)
1061 self.current_entry = POEntry()
1062 if self.current_entry.comment != '':
1063 self.current_entry.comment += '\n'
1064 self.current_entry.comment += self.current_token[3:]
1065 return True
1066
1068 """Handle a file:num occurence."""
1069 if self.current_state in ['MC', 'MS', 'MX']:
1070 _listappend(self.instance, self.current_entry)
1071 self.current_entry = POEntry()
1072 occurrences = _strsplit(self.current_token[3:])
1073 for occurrence in occurrences:
1074 if occurrence != '':
1075 fil, line = _strsplit(occurrence, ':')
1076 _listappend(self.current_entry.occurrences, (fil, line))
1077 return True
1078
1080 """Handle a flags line."""
1081 if self.current_state in ['MC', 'MS', 'MX']:
1082 _listappend(self.instance, self.current_entry)
1083 self.current_entry = POEntry()
1084 self.current_entry.flags += _strsplit(self.current_token[3:], ', ')
1085 return True
1086
1088 """Handle a msgid."""
1089 if self.current_state in ['MC', 'MS', 'MX']:
1090 _listappend(self.instance, self.current_entry)
1091 self.current_entry = POEntry()
1092 self.current_entry.obsolete = self.entry_obsolete
1093 self.current_entry.msgid = unquote(self.current_token[7:-1])
1094 return True
1095
1097 """Handle a msgid plural."""
1098 self.current_entry.msgid_plural = unquote(self.current_token[14:-1])
1099 return True
1100
1102 """Handle a msgstr."""
1103 self.current_entry.msgstr = unquote(self.current_token[8:-1])
1104 return True
1105
1107 """Handle a msgstr plural."""
1108 index, value = self.current_token[7], self.current_token[11:-1]
1109 self.current_entry.msgstr_plural[index] = unquote(value)
1110 self.msgstr_index = index
1111 return True
1112
1114 """Handle a msgid or msgstr continuation line."""
1115 if self.current_state == 'MI':
1116 self.current_entry.msgid += unquote(self.current_token[1:-1])
1117 elif self.current_state == 'MP':
1118 self.current_entry.msgid_plural += \
1119 unquote(self.current_token[1:-1])
1120 elif self.current_state == 'MS':
1121 self.current_entry.msgstr += unquote(self.current_token[1:-1])
1122 elif self.current_state == 'MX':
1123 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
1124 unquote(self.current_token[1:-1])
1125 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
1126
1127 return False
1128
1129
1130
1132 """
1133 A class to parse binary mo files.
1134 """
1135
1137 """_MOFileParser constructor."""
1138 self.fhandle = open(fpath, 'rb')
1139 self.instance = MOFile(fpath)
1140
1142 """
1143 Parse the magic number and raise an exception if not valid.
1144 """
1145 magic_number = self._readbinary(fmt='4s')
1146
1147 if magic_number not in ['\xde\x12\x04\x95', '\x95\x04\x12\xde']:
1148 raise IOError('Invalid mo file, magic number is incorrect !')
1149 self.instance.magic_number = magic_number
1150
1152 """
1153 Build the instance with the file handle provided in the
1154 constructor.
1155 """
1156 self.parse_magicnumber()
1157
1158 self.instance.version = self._readbinary('L')
1159
1160 numofstrings = self._readbinary('L')
1161
1162 msgids_hash_offset = self._readbinary('L')
1163
1164 msgstrs_hash_offset = self._readbinary('P')
1165
1166 self.fhandle.seek(msgids_hash_offset)
1167 msgids_index = []
1168 for i in range(numofstrings):
1169 _listappend(msgids_index, self._readbinary('LL'))
1170
1171 self.fhandle.seek(msgstrs_hash_offset)
1172 msgstrs_index = []
1173 for i in range(numofstrings):
1174 _listappend(msgstrs_index, self._readbinary('LL'))
1175
1176 for i in range(numofstrings):
1177 self.fhandle.seek(msgids_index[i][1])
1178 msgid = self.fhandle.read(msgids_index[i][0])
1179 self.fhandle.seek(msgstrs_index[i][1])
1180 msgstr = self.fhandle.read(msgstrs_index[i][0])
1181 if i == 0:
1182 raw_metadata, metadata = _strsplit(msgstr, '\n'), {}
1183 for line in raw_metadata:
1184 tokens = _strsplit(line, ':', 1)
1185 if tokens[0] != '':
1186 try:
1187 metadata[tokens[0]] = _strstrip(tokens[1])
1188 except IndexError:
1189 metadata[tokens[0]] = ''
1190 self.instance.metadata = metadata
1191 continue
1192 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1193 _listappend(self.instance, entry)
1194
1195 self.fhandle.close()
1196 return self.instance
1197
1199 """
1200 Private method that unpack n bytes of data using format <fmt>.
1201 It returns a tuple or a mixed value if the tuple length is 1.
1202 """
1203 numbytes = struct.calcsize(fmt)
1204 bytes = self.fhandle.read(numbytes)
1205 tup = struct.unpack(fmt, bytes)
1206 if len(tup) == 1:
1207 return tup[0]
1208 return tup
1209
1210
1211
1212 if __name__ == '__main__':
1213 """
1214 **Main function**::
1215 - to **test** the module just run: *python polib.py [-v]*
1216 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1217 """
1218
1219 import sys
1220 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1222 if f.endswith('po'):
1223 p = pofile(f)
1224 else:
1225 p = mofile(f)
1226 s = str(p)
1227 import profile
1228 profile.run('test("'+sys.argv[2]+'")')
1229 else:
1230 import doctest
1231 doctest.testmod()
1232
1233