mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-25 15:52:25 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			1807 lines
		
	
	
		
			60 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1807 lines
		
	
	
		
			60 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #
 | |
| # License: MIT (see LICENSE file provided)
 | |
| # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
 | |
| 
 | |
| """
 | |
| **polib** allows you to manipulate, create, modify gettext files (pot, po and
 | |
| mo files).  You can load existing files, iterate through it's entries, add,
 | |
| modify entries, comments or metadata, etc. or create new po files from scratch.
 | |
| 
 | |
| **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
 | |
| :func:`~polib.mofile` convenience functions.
 | |
| """
 | |
| 
 | |
| import array
 | |
| import codecs
 | |
| import os
 | |
| import re
 | |
| import struct
 | |
| import sys
 | |
| import textwrap
 | |
| import io
 | |
| 
 | |
| 
 | |
| __author__ = 'David Jean Louis <izimobil@gmail.com>'
 | |
| __version__ = '1.2.0'
 | |
| __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
 | |
|            'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
 | |
| 
 | |
| 
 | |
| # the default encoding to use when encoding cannot be detected
 | |
| default_encoding = 'utf-8'
 | |
| 
 | |
| 
 | |
| # python 2/3 compatibility helpers {{{
 | |
| PY3 = True
 | |
| text_type = str
 | |
| 
 | |
| def b(s):
 | |
|     return s.encode()
 | |
| 
 | |
| def u(s):
 | |
|     return s
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # _pofile_or_mofile {{{
 | |
| def _pofile_or_mofile(f, type, **kwargs):
 | |
|     """
 | |
|     Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
 | |
|     honor the DRY concept.
 | |
|     """
 | |
|     # get the file encoding
 | |
|     enc = kwargs.get('encoding')
 | |
|     if enc is None:
 | |
|         enc = detect_encoding(f, type == 'mofile')
 | |
| 
 | |
|     # parse the file
 | |
|     kls = type == 'pofile' and _POFileParser or _MOFileParser
 | |
|     parser = kls(
 | |
|         f,
 | |
|         encoding=enc,
 | |
|         check_for_duplicates=kwargs.get('check_for_duplicates', False),
 | |
|         klass=kwargs.get('klass')
 | |
|     )
 | |
|     instance = parser.parse()
 | |
|     instance.wrapwidth = kwargs.get('wrapwidth', 78)
 | |
|     return instance
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # _is_file {{{
 | |
| def _is_file(filename_or_contents):
 | |
|     """
 | |
|     Safely returns the value of os.path.exists(filename_or_contents).
 | |
| 
 | |
|     Arguments:
 | |
| 
 | |
|     ``filename_or_contents``
 | |
|         either a filename, or a string holding the contents of some file.
 | |
|         In the latter case, this function will always return False.
 | |
|     """
 | |
|     try:
 | |
|         return os.path.isfile(filename_or_contents)
 | |
|     except (TypeError, ValueError, UnicodeEncodeError):
 | |
|         return False
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function pofile() {{{
 | |
| def pofile(pofile, **kwargs):
 | |
|     """
 | |
|     Convenience function that parses the po or pot file ``pofile`` and returns
 | |
|     a :class:`~polib.POFile` instance.
 | |
| 
 | |
|     Arguments:
 | |
| 
 | |
|     ``pofile``
 | |
|         string, full or relative path to the po/pot file or its content (data).
 | |
| 
 | |
|     ``wrapwidth``
 | |
|         integer, the wrap width, only useful when the ``-w`` option was passed
 | |
|         to xgettext (optional, default: ``78``).
 | |
| 
 | |
|     ``encoding``
 | |
|         string, the encoding to use (e.g. "utf-8") (default: ``None``, the
 | |
|         encoding will be auto-detected).
 | |
| 
 | |
|     ``check_for_duplicates``
 | |
|         whether to check for duplicate entries when adding entries to the
 | |
|         file (optional, default: ``False``).
 | |
| 
 | |
|     ``klass``
 | |
|         class which is used to instantiate the return value (optional,
 | |
|         default: ``None``, the return value with be a :class:`~polib.POFile`
 | |
|         instance).
 | |
|     """
 | |
|     return _pofile_or_mofile(pofile, 'pofile', **kwargs)
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function mofile() {{{
 | |
| def mofile(mofile, **kwargs):
 | |
|     """
 | |
|     Convenience function that parses the mo file ``mofile`` and returns a
 | |
|     :class:`~polib.MOFile` instance.
 | |
| 
 | |
|     Arguments:
 | |
| 
 | |
|     ``mofile``
 | |
|         string, full or relative path to the mo file or its content (string
 | |
|         or bytes).
 | |
| 
 | |
|     ``wrapwidth``
 | |
|         integer, the wrap width, only useful when the ``-w`` option was passed
 | |
|         to xgettext to generate the po file that was used to format the mo file
 | |
|         (optional, default: ``78``).
 | |
| 
 | |
|     ``encoding``
 | |
|         string, the encoding to use (e.g. "utf-8") (default: ``None``, the
 | |
|         encoding will be auto-detected).
 | |
| 
 | |
|     ``check_for_duplicates``
 | |
|         whether to check for duplicate entries when adding entries to the
 | |
|         file (optional, default: ``False``).
 | |
| 
 | |
|     ``klass``
 | |
|         class which is used to instantiate the return value (optional,
 | |
|         default: ``None``, the return value with be a :class:`~polib.POFile`
 | |
|         instance).
 | |
|     """
 | |
|     return _pofile_or_mofile(mofile, 'mofile', **kwargs)
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function detect_encoding() {{{
 | |
| def detect_encoding(file, binary_mode=False):
 | |
|     """
 | |
|     Try to detect the encoding used by the ``file``. The ``file`` argument can
 | |
|     be a PO or MO file path or a string containing the contents of the file.
 | |
|     If the encoding cannot be detected, the function will return the value of
 | |
|     ``default_encoding``.
 | |
| 
 | |
|     Arguments:
 | |
| 
 | |
|     ``file``
 | |
|         string, full or relative path to the po/mo file or its content.
 | |
| 
 | |
|     ``binary_mode``
 | |
|         boolean, set this to True if ``file`` is a mo file.
 | |
|     """
 | |
|     PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
 | |
|     rxt = re.compile(u(PATTERN))
 | |
|     rxb = re.compile(b(PATTERN))
 | |
| 
 | |
|     def charset_exists(charset):
 | |
|         """Check whether ``charset`` is valid or not."""
 | |
|         try:
 | |
|             codecs.lookup(charset)
 | |
|         except LookupError:
 | |
|             return False
 | |
|         return True
 | |
| 
 | |
|     if not _is_file(file):
 | |
|         try:
 | |
|             match = rxt.search(file)
 | |
|         except TypeError:
 | |
|             match = rxb.search(file)
 | |
|         if match:
 | |
|             enc = match.group(1).strip()
 | |
|             if not isinstance(enc, text_type):
 | |
|                 enc = enc.decode('utf-8')
 | |
|             if charset_exists(enc):
 | |
|                 return enc
 | |
|     else:
 | |
|         # For PY3, always treat as binary
 | |
|         if binary_mode or PY3:
 | |
|             mode = 'rb'
 | |
|             rx = rxb
 | |
|         else:
 | |
|             mode = 'r'
 | |
|             rx = rxt
 | |
|         with open(file, mode) as f:
 | |
|             for line in f.readlines():
 | |
|                 match = rx.search(line)
 | |
|                 if match:
 | |
|                     f.close()
 | |
|                     enc = match.group(1).strip()
 | |
|                     if not isinstance(enc, text_type):
 | |
|                         enc = enc.decode('utf-8')
 | |
|                     if charset_exists(enc):
 | |
|                         return enc
 | |
|     return default_encoding
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function escape() {{{
 | |
| def escape(st):
 | |
|     """
 | |
|     Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r``, ``\\v``,
 | |
|     ``\\b``, ``\\f`` and ``"`` in the given string ``st`` and returns it.
 | |
|     """
 | |
|     return st.replace('\\', r'\\')\
 | |
|              .replace('\t', r'\t')\
 | |
|              .replace('\r', r'\r')\
 | |
|              .replace('\n', r'\n')\
 | |
|              .replace('\v', r'\v')\
 | |
|              .replace('\b', r'\b')\
 | |
|              .replace('\f', r'\f')\
 | |
|              .replace('\"', r'\"')
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function unescape() {{{
 | |
| def unescape(st):
 | |
|     """
 | |
|     Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r``, ``\\v``,
 | |
|     ``\\b``, ``\\f`` and ``"`` in the given string ``st`` and returns it.
 | |
|     """
 | |
|     def unescape_repl(m):
 | |
|         m = m.group(1)
 | |
|         if m == 'n':
 | |
|             return '\n'
 | |
|         if m == 't':
 | |
|             return '\t'
 | |
|         if m == 'r':
 | |
|             return '\r'
 | |
|         if m == 'v':
 | |
|             return '\v'
 | |
|         if m == 'b':
 | |
|             return '\b'
 | |
|         if m == 'f':
 | |
|             return '\f'
 | |
|         if m == '\\':
 | |
|             return '\\'
 | |
|         return m  # handles escaped double quote
 | |
|     return re.sub(r'\\(\\|n|t|r|v|b|f|")', unescape_repl, st)
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # function natural_sort() {{{
 | |
| def natural_sort(lst):
 | |
|     """
 | |
|     Sort naturally the given list.
 | |
|     Credits: http://stackoverflow.com/a/4836734
 | |
|     """
 | |
|     def convert(text):
 | |
|         return int(text) if text.isdigit() else text.lower()
 | |
| 
 | |
|     def alphanum_key(key):
 | |
|         return [convert(c) for c in re.split('([0-9]+)', key)]
 | |
| 
 | |
|     return sorted(lst, key=alphanum_key)
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class _BaseFile {{{
 | |
| class _BaseFile(list):
 | |
|     """
 | |
|     Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
 | |
|     classes. This class should **not** be instantiated directly.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Constructor, accepts the following keyword arguments:
 | |
| 
 | |
|         ``pofile``
 | |
|             string, the path to the po or mo file, or its content as a string.
 | |
| 
 | |
|         ``wrapwidth``
 | |
|             integer, the wrap width, only useful when the ``-w`` option was
 | |
|             passed to xgettext (optional, default: ``78``).
 | |
| 
 | |
|         ``encoding``
 | |
|             string, the encoding to use, defaults to ``default_encoding``
 | |
|             global variable (optional).
 | |
| 
 | |
|         ``check_for_duplicates``
 | |
|             whether to check for duplicate entries when adding entries to the
 | |
|             file, (optional, default: ``False``).
 | |
|         """
 | |
|         list.__init__(self)
 | |
|         # the opened file handle
 | |
|         pofile = kwargs.get('pofile', None)
 | |
|         if pofile and _is_file(pofile):
 | |
|             self.fpath = pofile
 | |
|         else:
 | |
|             self.fpath = kwargs.get('fpath')
 | |
|         # the width at which lines should be wrapped
 | |
|         self.wrapwidth = kwargs.get('wrapwidth', 78)
 | |
|         # the file encoding
 | |
|         self.encoding = kwargs.get('encoding', default_encoding)
 | |
|         # whether to check for duplicate entries or not
 | |
|         self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
 | |
|         # header
 | |
|         self.header = ''
 | |
|         # both po and mo files have metadata
 | |
|         self.metadata = {}
 | |
|         self.metadata_is_fuzzy = 0
 | |
| 
 | |
|     def __unicode__(self):
 | |
|         """
 | |
|         Returns the unicode representation of the file.
 | |
|         """
 | |
|         ret = []
 | |
|         entries = [self.metadata_as_entry()] + \
 | |
|                   [e for e in self if not e.obsolete]
 | |
|         for entry in entries:
 | |
|             ret.append(entry.__unicode__(self.wrapwidth))
 | |
|         for entry in self.obsolete_entries():
 | |
|             ret.append(entry.__unicode__(self.wrapwidth))
 | |
|         ret = u('\n').join(ret)
 | |
|         return ret
 | |
| 
 | |
|     if PY3:
 | |
|         def __str__(self):
 | |
|             return self.__unicode__()
 | |
|     else:
 | |
|         def __str__(self):
 | |
|             """
 | |
|             Returns the string representation of the file.
 | |
|             """
 | |
|             return unicode(self).encode(self.encoding)
 | |
| 
 | |
|     def __contains__(self, entry):
 | |
|         """
 | |
|         Overridden ``list`` method to implement the membership test (in and
 | |
|         not in).
 | |
|         The method considers that an entry is in the file if it finds an entry
 | |
|         that has the same msgid (the test is **case sensitive**) and the same
 | |
|         msgctxt (or none for both entries).
 | |
| 
 | |
|         Argument:
 | |
| 
 | |
|         ``entry``
 | |
|             an instance of :class:`~polib._BaseEntry`.
 | |
|         """
 | |
|         return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
 | |
|             is not None
 | |
| 
 | |
|     def __eq__(self, other):
 | |
|         return str(self) == str(other)
 | |
| 
 | |
|     def append(self, entry):
 | |
|         """
 | |
|         Overridden method to check for duplicates entries, if a user tries to
 | |
|         add an entry that is already in the file, the method will raise a
 | |
|         ``ValueError`` exception.
 | |
| 
 | |
|         Argument:
 | |
| 
 | |
|         ``entry``
 | |
|             an instance of :class:`~polib._BaseEntry`.
 | |
|         """
 | |
|         # check_for_duplicates may not be defined (yet) when unpickling.
 | |
|         # But if pickling, we never want to check for duplicates anyway.
 | |
|         if getattr(self, 'check_for_duplicates', False) and entry in self:
 | |
|             raise ValueError('Entry "%s" already exists' % entry.msgid)
 | |
|         super().append(entry)
 | |
| 
 | |
|     def insert(self, index, entry):
 | |
|         """
 | |
|         Overridden method to check for duplicates entries, if a user tries to
 | |
|         add an entry that is already in the file, the method will raise a
 | |
|         ``ValueError`` exception.
 | |
| 
 | |
|         Arguments:
 | |
| 
 | |
|         ``index``
 | |
|             index at which the entry should be inserted.
 | |
| 
 | |
|         ``entry``
 | |
|             an instance of :class:`~polib._BaseEntry`.
 | |
|         """
 | |
|         if self.check_for_duplicates and entry in self:
 | |
|             raise ValueError('Entry "%s" already exists' % entry.msgid)
 | |
|         super().insert(index, entry)
 | |
| 
 | |
|     def metadata_as_entry(self):
 | |
|         """
 | |
|         Returns the file metadata as a :class:`~polib.POFile` instance.
 | |
|         """
 | |
|         e = POEntry(msgid='')
 | |
|         mdata = self.ordered_metadata()
 | |
|         if mdata:
 | |
|             strs = []
 | |
|             for name, value in mdata:
 | |
|                 # Strip whitespace off each line in a multi-line entry
 | |
|                 strs.append(f'{name}: {value}')
 | |
|             e.msgstr = '\n'.join(strs) + '\n'
 | |
|         if self.metadata_is_fuzzy:
 | |
|             e.flags.append('fuzzy')
 | |
|         return e
 | |
| 
 | |
|     def save(self, fpath=None, repr_method='__unicode__', newline=None):
 | |
|         """
 | |
|         Saves the po file to ``fpath``.
 | |
|         If it is an existing file and no ``fpath`` is provided, then the
 | |
|         existing file is rewritten with the modified data.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         ``fpath``
 | |
|             string, full or relative path to the file.
 | |
| 
 | |
|         ``repr_method``
 | |
|             string, the method to use for output.
 | |
| 
 | |
|         ``newline``
 | |
|             string, controls how universal newlines works
 | |
|         """
 | |
|         if self.fpath is None and fpath is None:
 | |
|             raise OSError('You must provide a file path to save() method')
 | |
|         contents = getattr(self, repr_method)()
 | |
|         if fpath is None:
 | |
|             fpath = self.fpath
 | |
|         if repr_method == 'to_binary':
 | |
|             with open(fpath, 'wb') as fhandle:
 | |
|                 fhandle.write(contents)
 | |
|         else:
 | |
|             with open(
 | |
|                 fpath,
 | |
|                 'w',
 | |
|                 encoding=self.encoding,
 | |
|                 newline=newline
 | |
|             ) as fhandle:
 | |
|                 if not isinstance(contents, text_type):
 | |
|                     contents = contents.decode(self.encoding)
 | |
|                 fhandle.write(contents)
 | |
| 
 | |
|         # set the file path if not set
 | |
|         if self.fpath is None and fpath:
 | |
|             self.fpath = fpath
 | |
| 
 | |
|     def find(self, st, by='msgid', include_obsolete_entries=False,
 | |
|              msgctxt=False):
 | |
|         """
 | |
|         Find the entry which msgid (or property identified by the ``by``
 | |
|         argument) matches the string ``st``.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         ``st``
 | |
|             string, the string to search for.
 | |
| 
 | |
|         ``by``
 | |
|             string, the property to use for comparison (default: ``msgid``).
 | |
| 
 | |
|         ``include_obsolete_entries``
 | |
|             boolean, whether to also search in entries that are obsolete.
 | |
| 
 | |
|         ``msgctxt``
 | |
|             string, allows specifying a specific message context for the
 | |
|             search.
 | |
|         """
 | |
|         if include_obsolete_entries:
 | |
|             entries = self[:]
 | |
|         else:
 | |
|             entries = [e for e in self if not e.obsolete]
 | |
|         matches = []
 | |
|         for e in entries:
 | |
|             if getattr(e, by) == st:
 | |
|                 if msgctxt is not False and e.msgctxt != msgctxt:
 | |
|                     continue
 | |
|                 matches.append(e)
 | |
|         if len(matches) == 1:
 | |
|             return matches[0]
 | |
|         elif len(matches) > 1:
 | |
|             if not msgctxt:
 | |
|                 # find the entry with no msgctx
 | |
|                 e = None
 | |
|                 for m in matches:
 | |
|                     if not m.msgctxt:
 | |
|                         e = m
 | |
|                 if e:
 | |
|                     return e
 | |
|                 # fallback to the first entry found
 | |
|                 return matches[0]
 | |
|         return None
 | |
| 
 | |
|     def ordered_metadata(self):
 | |
|         """
 | |
|         Convenience method that returns an ordered version of the metadata
 | |
|         dictionary. The return value is list of tuples (metadata name,
 | |
|         metadata_value).
 | |
|         """
 | |
|         # copy the dict first
 | |
|         metadata = self.metadata.copy()
 | |
|         data_order = [
 | |
|             'Project-Id-Version',
 | |
|             'Report-Msgid-Bugs-To',
 | |
|             'POT-Creation-Date',
 | |
|             'PO-Revision-Date',
 | |
|             'Last-Translator',
 | |
|             'Language-Team',
 | |
|             'Language',
 | |
|             'MIME-Version',
 | |
|             'Content-Type',
 | |
|             'Content-Transfer-Encoding',
 | |
|             'Plural-Forms'
 | |
|         ]
 | |
|         ordered_data = []
 | |
|         for data in data_order:
 | |
|             try:
 | |
|                 value = metadata.pop(data)
 | |
|                 ordered_data.append((data, value))
 | |
|             except KeyError:
 | |
|                 pass
 | |
|         # the rest of the metadata will be alphabetically ordered since there
 | |
|         # are no specs for this AFAIK
 | |
|         for data in natural_sort(metadata.keys()):
 | |
|             value = metadata[data]
 | |
|             ordered_data.append((data, value))
 | |
|         return ordered_data
 | |
| 
 | |
|     def to_binary(self):
 | |
|         """
 | |
|         Return the binary representation of the file.
 | |
|         """
 | |
|         offsets = []
 | |
|         entries = self.translated_entries()
 | |
| 
 | |
|         # the keys are sorted in the .mo file
 | |
|         def cmp(_self, other):
 | |
|             # msgfmt compares entries with msgctxt if it exists
 | |
|             self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
 | |
|             other_msgid = other.msgctxt and other.msgctxt or other.msgid
 | |
|             if self_msgid > other_msgid:
 | |
|                 return 1
 | |
|             elif self_msgid < other_msgid:
 | |
|                 return -1
 | |
|             else:
 | |
|                 return 0
 | |
|         # add metadata entry
 | |
|         entries.sort(key=lambda o: o.msgid_with_context.encode('utf-8'))
 | |
|         mentry = self.metadata_as_entry()
 | |
|         entries = [mentry] + entries
 | |
|         entries_len = len(entries)
 | |
|         ids, strs = b(''), b('')
 | |
|         for e in entries:
 | |
|             # For each string, we need size and file offset.  Each string is
 | |
|             # NUL terminated; the NUL does not count into the size.
 | |
|             msgid = b('')
 | |
|             if e.msgctxt:
 | |
|                 # Contexts are stored by storing the concatenation of the
 | |
|                 # context, a <EOT> byte, and the original string
 | |
|                 msgid = self._encode(e.msgctxt + '\4')
 | |
|             if e.msgid_plural:
 | |
|                 msgstr = []
 | |
|                 for index in sorted(e.msgstr_plural.keys()):
 | |
|                     msgstr.append(e.msgstr_plural[index])
 | |
|                 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
 | |
|                 msgstr = self._encode('\0'.join(msgstr))
 | |
|             else:
 | |
|                 msgid += self._encode(e.msgid)
 | |
|                 msgstr = self._encode(e.msgstr)
 | |
|             offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
 | |
|             ids += msgid + b('\0')
 | |
|             strs += msgstr + b('\0')
 | |
| 
 | |
|         # The header is 7 32-bit unsigned integers.
 | |
|         keystart = 7 * 4 + 16 * entries_len
 | |
|         # and the values start after the keys
 | |
|         valuestart = keystart + len(ids)
 | |
|         koffsets = []
 | |
|         voffsets = []
 | |
|         # The string table first has the list of keys, then the list of values.
 | |
|         # Each entry has first the size of the string, then the file offset.
 | |
|         for o1, l1, o2, l2 in offsets:
 | |
|             koffsets += [l1, o1 + keystart]
 | |
|             voffsets += [l2, o2 + valuestart]
 | |
|         offsets = koffsets + voffsets
 | |
| 
 | |
|         output = struct.pack(
 | |
|             "Iiiiiii",
 | |
|             # Magic number
 | |
|             MOFile.MAGIC,
 | |
|             # Version
 | |
|             0,
 | |
|             # number of entries
 | |
|             entries_len,
 | |
|             # start of key index
 | |
|             7 * 4,
 | |
|             # start of value index
 | |
|             7 * 4 + entries_len * 8,
 | |
|             # size and offset of hash table, we don't use hash tables
 | |
|             0, keystart
 | |
| 
 | |
|         )
 | |
|         if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
 | |
|             output += array.array("i", offsets).tobytes()
 | |
|         else:
 | |
|             output += array.array("i", offsets).tostring()
 | |
|         output += ids
 | |
|         output += strs
 | |
|         return output
 | |
| 
 | |
|     def _encode(self, mixed):
 | |
|         """
 | |
|         Encodes the given ``mixed`` argument with the file encoding if and
 | |
|         only if it's an unicode string and returns the encoded string.
 | |
|         """
 | |
|         if isinstance(mixed, text_type):
 | |
|             mixed = mixed.encode(self.encoding)
 | |
|         return mixed
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class POFile {{{
 | |
| class POFile(_BaseFile):
 | |
|     """
 | |
|     Po (or Pot) file reader/writer.
 | |
|     This class inherits the :class:`~polib._BaseFile` class and, by extension,
 | |
|     the python ``list`` type.
 | |
|     """
 | |
| 
 | |
|     def __unicode__(self):
 | |
|         """
 | |
|         Returns the unicode representation of the po file.
 | |
|         """
 | |
|         ret, headers = '', self.header.split('\n')
 | |
|         for header in headers:
 | |
|             if not len(header):
 | |
|                 ret += "#\n"
 | |
|             elif header[:1] in [',', ':']:
 | |
|                 ret += '#%s\n' % header
 | |
|             else:
 | |
|                 ret += '# %s\n' % header
 | |
| 
 | |
|         if not isinstance(ret, text_type):
 | |
|             ret = ret.decode(self.encoding)
 | |
| 
 | |
|         return ret + _BaseFile.__unicode__(self)
 | |
| 
 | |
|     def save_as_mofile(self, fpath):
 | |
|         """
 | |
|         Saves the binary representation of the file to given ``fpath``.
 | |
| 
 | |
|         Keyword argument:
 | |
| 
 | |
|         ``fpath``
 | |
|             string, full or relative path to the mo file.
 | |
|         """
 | |
|         _BaseFile.save(self, fpath, 'to_binary')
 | |
| 
 | |
|     def percent_translated(self):
 | |
|         """
 | |
|         Convenience method that returns the percentage of translated
 | |
|         messages.
 | |
|         """
 | |
|         total = len([e for e in self if not e.obsolete])
 | |
|         if total == 0:
 | |
|             return 100
 | |
|         translated = len(self.translated_entries())
 | |
|         return int(translated * 100 / float(total))
 | |
| 
 | |
|     def translated_entries(self):
 | |
|         """
 | |
|         Convenience method that returns the list of translated entries.
 | |
|         """
 | |
|         return [e for e in self if e.translated()]
 | |
| 
 | |
|     def untranslated_entries(self):
 | |
|         """
 | |
|         Convenience method that returns the list of untranslated entries.
 | |
|         """
 | |
|         return [e for e in self if not e.translated() and not e.obsolete
 | |
|                 and not e.fuzzy]
 | |
| 
 | |
|     def fuzzy_entries(self):
 | |
|         """
 | |
|         Convenience method that returns the list of fuzzy entries.
 | |
|         """
 | |
|         return [e for e in self if e.fuzzy and not e.obsolete]
 | |
| 
 | |
|     def obsolete_entries(self):
 | |
|         """
 | |
|         Convenience method that returns the list of obsolete entries.
 | |
|         """
 | |
|         return [e for e in self if e.obsolete]
 | |
| 
 | |
|     def merge(self, refpot):
 | |
|         """
 | |
|         Convenience method that merges the current pofile with the pot file
 | |
|         provided. It behaves exactly as the gettext msgmerge utility:
 | |
| 
 | |
|         * comments of this file will be preserved, but extracted comments and
 | |
|           occurrences will be discarded;
 | |
|         * any translations or comments in the file will be discarded, however,
 | |
|           dot comments and file positions will be preserved;
 | |
|         * the fuzzy flags are preserved.
 | |
| 
 | |
|         Keyword argument:
 | |
| 
 | |
|         ``refpot``
 | |
|             object POFile, the reference catalog.
 | |
|         """
 | |
|         # Store entries in dict/set for faster access
 | |
|         self_entries = {
 | |
|             entry.msgid_with_context: entry for entry in self
 | |
|         }
 | |
|         refpot_msgids = {entry.msgid_with_context for entry in refpot}
 | |
|         # Merge entries that are in the refpot
 | |
|         for entry in refpot:
 | |
|             e = self_entries.get(entry.msgid_with_context)
 | |
|             if e is None:
 | |
|                 e = POEntry()
 | |
|                 self.append(e)
 | |
|             e.merge(entry)
 | |
|         # ok, now we must "obsolete" entries that are not in the refpot anymore
 | |
|         for entry in self:
 | |
|             if entry.msgid_with_context not in refpot_msgids:
 | |
|                 entry.obsolete = True
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class MOFile {{{
 | |
| class MOFile(_BaseFile):
 | |
|     """
 | |
|     Mo file reader/writer.
 | |
|     This class inherits the :class:`~polib._BaseFile` class and, by
 | |
|     extension, the python ``list`` type.
 | |
|     """
 | |
|     MAGIC = 0x950412de
 | |
|     MAGIC_SWAPPED = 0xde120495
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Constructor, accepts all keywords arguments accepted by
 | |
|         :class:`~polib._BaseFile` class.
 | |
|         """
 | |
|         _BaseFile.__init__(self, *args, **kwargs)
 | |
|         self.magic_number = None
 | |
|         self.version = 0
 | |
| 
 | |
|     def save_as_pofile(self, fpath):
 | |
|         """
 | |
|         Saves the mofile as a pofile to ``fpath``.
 | |
| 
 | |
|         Keyword argument:
 | |
| 
 | |
|         ``fpath``
 | |
|             string, full or relative path to the file.
 | |
|         """
 | |
|         _BaseFile.save(self, fpath)
 | |
| 
 | |
|     def save(self, fpath=None):
 | |
|         """
 | |
|         Saves the mofile to ``fpath``.
 | |
| 
 | |
|         Keyword argument:
 | |
| 
 | |
|         ``fpath``
 | |
|             string, full or relative path to the file.
 | |
|         """
 | |
|         _BaseFile.save(self, fpath, 'to_binary')
 | |
| 
 | |
|     def percent_translated(self):
 | |
|         """
 | |
|         Convenience method to keep the same interface with POFile instances.
 | |
|         """
 | |
|         return 100
 | |
| 
 | |
|     def translated_entries(self):
 | |
|         """
 | |
|         Convenience method to keep the same interface with POFile instances.
 | |
|         """
 | |
|         return self
 | |
| 
 | |
|     def untranslated_entries(self):
 | |
|         """
 | |
|         Convenience method to keep the same interface with POFile instances.
 | |
|         """
 | |
|         return []
 | |
| 
 | |
|     def fuzzy_entries(self):
 | |
|         """
 | |
|         Convenience method to keep the same interface with POFile instances.
 | |
|         """
 | |
|         return []
 | |
| 
 | |
|     def obsolete_entries(self):
 | |
|         """
 | |
|         Convenience method to keep the same interface with POFile instances.
 | |
|         """
 | |
|         return []
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class _BaseEntry {{{
 | |
| class _BaseEntry:
 | |
|     """
 | |
|     Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
 | |
|     This class should **not** be instantiated directly.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Constructor, accepts the following keyword arguments:
 | |
| 
 | |
|         ``msgid``
 | |
|             string, the entry msgid.
 | |
| 
 | |
|         ``msgstr``
 | |
|             string, the entry msgstr.
 | |
| 
 | |
|         ``msgid_plural``
 | |
|             string, the entry msgid_plural.
 | |
| 
 | |
|         ``msgstr_plural``
 | |
|             dict, the entry msgstr_plural lines.
 | |
| 
 | |
|         ``msgctxt``
 | |
|             string, the entry context (msgctxt).
 | |
| 
 | |
|         ``obsolete``
 | |
|             bool, whether the entry is "obsolete" or not.
 | |
| 
 | |
|         ``encoding``
 | |
|             string, the encoding to use, defaults to ``default_encoding``
 | |
|             global variable (optional).
 | |
|         """
 | |
|         self.msgid = kwargs.get('msgid', '')
 | |
|         self.msgstr = kwargs.get('msgstr', '')
 | |
|         self.msgid_plural = kwargs.get('msgid_plural', '')
 | |
|         self.msgstr_plural = kwargs.get('msgstr_plural', {})
 | |
|         self.msgctxt = kwargs.get('msgctxt', None)
 | |
|         self.obsolete = kwargs.get('obsolete', False)
 | |
|         self.encoding = kwargs.get('encoding', default_encoding)
 | |
| 
 | |
|     def __unicode__(self, wrapwidth=78):
 | |
|         """
 | |
|         Returns the unicode representation of the entry.
 | |
|         """
 | |
|         if self.obsolete:
 | |
|             delflag = '#~ '
 | |
|         else:
 | |
|             delflag = ''
 | |
|         ret = []
 | |
|         # write the msgctxt if any
 | |
|         if self.msgctxt is not None:
 | |
|             ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
 | |
|                                    wrapwidth)
 | |
|         # write the msgid
 | |
|         ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
 | |
|         # write the msgid_plural if any
 | |
|         if self.msgid_plural:
 | |
|             ret += self._str_field("msgid_plural", delflag, "",
 | |
|                                    self.msgid_plural, wrapwidth)
 | |
|         if self.msgstr_plural:
 | |
|             # write the msgstr_plural if any
 | |
|             msgstrs = self.msgstr_plural
 | |
|             keys = list(msgstrs)
 | |
|             keys.sort()
 | |
|             for index in keys:
 | |
|                 msgstr = msgstrs[index]
 | |
|                 plural_index = '[%s]' % index
 | |
|                 ret += self._str_field("msgstr", delflag, plural_index, msgstr,
 | |
|                                        wrapwidth)
 | |
|         else:
 | |
|             # otherwise write the msgstr
 | |
|             ret += self._str_field("msgstr", delflag, "", self.msgstr,
 | |
|                                    wrapwidth)
 | |
|         ret.append('')
 | |
|         ret = u('\n').join(ret)
 | |
|         return ret
 | |
| 
 | |
|     if PY3:
 | |
|         def __str__(self):
 | |
|             return self.__unicode__()
 | |
|     else:
 | |
|         def __str__(self):
 | |
|             """
 | |
|             Returns the string representation of the entry.
 | |
|             """
 | |
|             return unicode(self).encode(self.encoding)
 | |
| 
 | |
|     def __eq__(self, other):
 | |
|         return str(self) == str(other)
 | |
| 
 | |
|     def _str_field(self, fieldname, delflag, plural_index, field,
 | |
|                    wrapwidth=78):
 | |
|         lines = field.splitlines(True)
 | |
|         if len(lines) > 1:
 | |
|             lines = [''] + lines  # start with initial empty line
 | |
|         else:
 | |
|             escaped_field = escape(field)
 | |
|             specialchars_count = 0
 | |
|             for c in ['\\', '\n', '\r', '\t', '\v', '\b', '\f', '"']:
 | |
|                 specialchars_count += field.count(c)
 | |
|             # comparison must take into account fieldname length + one space
 | |
|             # + 2 quotes (eg. msgid "<string>")
 | |
|             flength = len(fieldname) + 3
 | |
|             if plural_index:
 | |
|                 flength += len(plural_index)
 | |
|             real_wrapwidth = wrapwidth - flength + specialchars_count
 | |
|             if wrapwidth > 0 and len(field) > real_wrapwidth:
 | |
|                 # Wrap the line but take field name into account
 | |
|                 lines = [''] + [unescape(item) for item in textwrap.wrap(
 | |
|                     escaped_field,
 | |
|                     wrapwidth - 2,  # 2 for quotes ""
 | |
|                     drop_whitespace=False,
 | |
|                     break_long_words=False
 | |
|                 )]
 | |
|             else:
 | |
|                 lines = [field]
 | |
|         if fieldname.startswith('previous_'):
 | |
|             # quick and dirty trick to get the real field name
 | |
|             fieldname = fieldname[9:]
 | |
| 
 | |
|         ret = ['{}{}{} "{}"'.format(delflag, fieldname, plural_index,
 | |
|                                 escape(lines.pop(0)))]
 | |
|         for line in lines:
 | |
|             ret.append(f'{delflag}"{escape(line)}"')
 | |
|         return ret
 | |
| 
 | |
|     @property
 | |
|     def msgid_with_context(self):
 | |
|         if self.msgctxt:
 | |
|             return '{}{}{}'.format(self.msgctxt, "\x04", self.msgid)
 | |
|         return self.msgid
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class POEntry {{{
 | |
| class POEntry(_BaseEntry):
 | |
|     """
 | |
|     Represents a po file entry.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Constructor, accepts the following keyword arguments:
 | |
| 
 | |
|         ``comment``
 | |
|             string, the entry comment.
 | |
| 
 | |
|         ``tcomment``
 | |
|             string, the entry translator comment.
 | |
| 
 | |
|         ``occurrences``
 | |
|             list, the entry occurrences.
 | |
| 
 | |
|         ``flags``
 | |
|             list, the entry flags.
 | |
| 
 | |
|         ``previous_msgctxt``
 | |
|             string, the entry previous context.
 | |
| 
 | |
|         ``previous_msgid``
 | |
|             string, the entry previous msgid.
 | |
| 
 | |
|         ``previous_msgid_plural``
 | |
|             string, the entry previous msgid_plural.
 | |
| 
 | |
|         ``linenum``
 | |
|             integer, the line number of the entry
 | |
|         """
 | |
|         _BaseEntry.__init__(self, *args, **kwargs)
 | |
|         self.comment = kwargs.get('comment', '')
 | |
|         self.tcomment = kwargs.get('tcomment', '')
 | |
|         self.occurrences = kwargs.get('occurrences', [])
 | |
|         self.flags = kwargs.get('flags', [])
 | |
|         self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
 | |
|         self.previous_msgid = kwargs.get('previous_msgid', None)
 | |
|         self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
 | |
|         self.linenum = kwargs.get('linenum', None)
 | |
| 
 | |
|     def __unicode__(self, wrapwidth=78):
 | |
|         """
 | |
|         Returns the unicode representation of the entry.
 | |
|         """
 | |
|         ret = []
 | |
|         # comments first, if any (with text wrapping as xgettext does)
 | |
|         if self.obsolete:
 | |
|             comments = [('tcomment', '# ')]
 | |
|         else:
 | |
|             comments = [('tcomment', '# '), ('comment', '#. ')]
 | |
|         for c in comments:
 | |
|             val = getattr(self, c[0])
 | |
|             if val:
 | |
|                 for comment in val.split('\n'):
 | |
|                     if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
 | |
|                         ret += textwrap.wrap(
 | |
|                             comment,
 | |
|                             wrapwidth,
 | |
|                             initial_indent=c[1],
 | |
|                             subsequent_indent=c[1],
 | |
|                             break_long_words=False
 | |
|                         )
 | |
|                     else:
 | |
|                         ret.append(f'{c[1]}{comment}')
 | |
| 
 | |
|         # occurrences (with text wrapping as xgettext does)
 | |
|         if not self.obsolete and self.occurrences:
 | |
|             filelist = []
 | |
|             for fpath, lineno in self.occurrences:
 | |
|                 if lineno:
 | |
|                     filelist.append(f'{fpath}:{lineno}')
 | |
|                 else:
 | |
|                     filelist.append(fpath)
 | |
|             filestr = ' '.join(filelist)
 | |
|             if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
 | |
|                 # textwrap split words that contain hyphen, this is not
 | |
|                 # what we want for filenames, so the dirty hack is to
 | |
|                 # temporally replace hyphens with a char that a file cannot
 | |
|                 # contain, like "*"
 | |
|                 ret += [line.replace('*', '-') for line in textwrap.wrap(
 | |
|                     filestr.replace('-', '*'),
 | |
|                     wrapwidth,
 | |
|                     initial_indent='#: ',
 | |
|                     subsequent_indent='#: ',
 | |
|                     break_long_words=False
 | |
|                 )]
 | |
|             else:
 | |
|                 ret.append('#: ' + filestr)
 | |
| 
 | |
|         # flags (TODO: wrapping ?)
 | |
|         if self.flags:
 | |
|             ret.append('#, %s' % ', '.join(self.flags))
 | |
| 
 | |
|         # previous context and previous msgid/msgid_plural
 | |
|         fields = ['previous_msgctxt', 'previous_msgid',
 | |
|                   'previous_msgid_plural']
 | |
|         if self.obsolete:
 | |
|             prefix = "#~| "
 | |
|         else:
 | |
|             prefix = "#| "
 | |
|         for f in fields:
 | |
|             val = getattr(self, f)
 | |
|             if val is not None:
 | |
|                 ret += self._str_field(f, prefix, "", val, wrapwidth)
 | |
| 
 | |
|         ret.append(_BaseEntry.__unicode__(self, wrapwidth))
 | |
|         ret = u('\n').join(ret)
 | |
|         return ret
 | |
| 
 | |
|     def __cmp__(self, other):
 | |
|         """
 | |
|         Called by comparison operations if rich comparison is not defined.
 | |
|         """
 | |
|         # First: Obsolete test
 | |
|         if self.obsolete != other.obsolete:
 | |
|             if self.obsolete:
 | |
|                 return -1
 | |
|             else:
 | |
|                 return 1
 | |
|         # Work on a copy to protect original
 | |
|         occ1 = sorted(self.occurrences[:])
 | |
|         occ2 = sorted(other.occurrences[:])
 | |
|         if occ1 > occ2:
 | |
|             return 1
 | |
|         if occ1 < occ2:
 | |
|             return -1
 | |
|         # Compare context
 | |
|         msgctxt = self.msgctxt or '0'
 | |
|         othermsgctxt = other.msgctxt or '0'
 | |
|         if msgctxt > othermsgctxt:
 | |
|             return 1
 | |
|         elif msgctxt < othermsgctxt:
 | |
|             return -1
 | |
|         # Compare msgid_plural
 | |
|         msgid_plural = self.msgid_plural or '0'
 | |
|         othermsgid_plural = other.msgid_plural or '0'
 | |
|         if msgid_plural > othermsgid_plural:
 | |
|             return 1
 | |
|         elif msgid_plural < othermsgid_plural:
 | |
|             return -1
 | |
|         # Compare msgstr_plural
 | |
|         if self.msgstr_plural and isinstance(self.msgstr_plural, dict):
 | |
|             msgstr_plural = list(self.msgstr_plural.values())
 | |
|         else:
 | |
|             msgstr_plural = []
 | |
|         if other.msgstr_plural and isinstance(other.msgstr_plural, dict):
 | |
|             othermsgstr_plural = list(other.msgstr_plural.values())
 | |
|         else:
 | |
|             othermsgstr_plural = []
 | |
|         if msgstr_plural > othermsgstr_plural:
 | |
|             return 1
 | |
|         elif msgstr_plural < othermsgstr_plural:
 | |
|             return -1
 | |
|         # Compare msgid
 | |
|         if self.msgid > other.msgid:
 | |
|             return 1
 | |
|         elif self.msgid < other.msgid:
 | |
|             return -1
 | |
|         # Compare msgstr
 | |
|         if self.msgstr > other.msgstr:
 | |
|             return 1
 | |
|         elif self.msgstr < other.msgstr:
 | |
|             return -1
 | |
|         return 0
 | |
| 
 | |
|     def __gt__(self, other):
 | |
|         return self.__cmp__(other) > 0
 | |
| 
 | |
|     def __lt__(self, other):
 | |
|         return self.__cmp__(other) < 0
 | |
| 
 | |
|     def __ge__(self, other):
 | |
|         return self.__cmp__(other) >= 0
 | |
| 
 | |
|     def __le__(self, other):
 | |
|         return self.__cmp__(other) <= 0
 | |
| 
 | |
|     def __eq__(self, other):
 | |
|         return self.__cmp__(other) == 0
 | |
| 
 | |
|     def __ne__(self, other):
 | |
|         return self.__cmp__(other) != 0
 | |
| 
 | |
|     def translated(self):
 | |
|         """
 | |
|         Returns ``True`` if the entry has been translated or ``False``
 | |
|         otherwise.
 | |
|         """
 | |
|         if self.obsolete or self.fuzzy:
 | |
|             return False
 | |
|         if self.msgstr != '':
 | |
|             return True
 | |
|         if self.msgstr_plural:
 | |
|             for pos in self.msgstr_plural:
 | |
|                 if self.msgstr_plural[pos] == '':
 | |
|                     return False
 | |
|             return True
 | |
|         return False
 | |
| 
 | |
|     def merge(self, other):
 | |
|         """
 | |
|         Merge the current entry with the given pot entry.
 | |
|         """
 | |
|         self.msgid = other.msgid
 | |
|         self.msgctxt = other.msgctxt
 | |
|         self.occurrences = other.occurrences
 | |
|         self.comment = other.comment
 | |
|         fuzzy = self.fuzzy
 | |
|         self.flags = other.flags[:]  # clone flags
 | |
|         if fuzzy:
 | |
|             self.flags.append('fuzzy')
 | |
|         self.msgid_plural = other.msgid_plural
 | |
|         self.obsolete = other.obsolete
 | |
|         self.previous_msgctxt = other.previous_msgctxt
 | |
|         self.previous_msgid = other.previous_msgid
 | |
|         self.previous_msgid_plural = other.previous_msgid_plural
 | |
|         if other.msgstr_plural:
 | |
|             for pos in other.msgstr_plural:
 | |
|                 try:
 | |
|                     # keep existing translation at pos if any
 | |
|                     self.msgstr_plural[pos]
 | |
|                 except KeyError:
 | |
|                     self.msgstr_plural[pos] = ''
 | |
| 
 | |
|     @property
 | |
|     def fuzzy(self):
 | |
|         return 'fuzzy' in self.flags
 | |
| 
 | |
|     @fuzzy.setter
 | |
|     def fuzzy(self, value):
 | |
|         if value and not self.fuzzy:
 | |
|             self.flags.insert(0, 'fuzzy')
 | |
|         elif not value and self.fuzzy:
 | |
|             self.flags.remove('fuzzy')
 | |
| 
 | |
|     def __hash__(self):
 | |
|         return hash((self.msgid, self.msgstr))
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class MOEntry {{{
 | |
| class MOEntry(_BaseEntry):
 | |
|     """
 | |
|     Represents a mo file entry.
 | |
|     """
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Constructor, accepts the following keyword arguments,
 | |
|         for consistency with :class:`~polib.POEntry`:
 | |
| 
 | |
|         ``comment``
 | |
|         ``tcomment``
 | |
|         ``occurrences``
 | |
|         ``flags``
 | |
|         ``previous_msgctxt``
 | |
|         ``previous_msgid``
 | |
|         ``previous_msgid_plural``
 | |
| 
 | |
|         Note: even though these keyword arguments are accepted,
 | |
|         they hold no real meaning in the context of MO files
 | |
|         and are simply ignored.
 | |
|         """
 | |
|         _BaseEntry.__init__(self, *args, **kwargs)
 | |
|         self.comment = ''
 | |
|         self.tcomment = ''
 | |
|         self.occurrences = []
 | |
|         self.flags = []
 | |
|         self.previous_msgctxt = None
 | |
|         self.previous_msgid = None
 | |
|         self.previous_msgid_plural = None
 | |
| 
 | |
|     def __hash__(self):
 | |
|         return hash((self.msgid, self.msgstr))
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class _POFileParser {{{
 | |
| class _POFileParser:
 | |
|     """
 | |
|     A finite state machine to efficiently and correctly parse po
 | |
|     file format.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, pofile, *args, **kwargs):
 | |
|         """
 | |
|         Constructor.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         ``pofile``
 | |
|             string, path to the po file or its content
 | |
| 
 | |
|         ``encoding``
 | |
|             string, the encoding to use, defaults to ``default_encoding``
 | |
|             global variable (optional).
 | |
| 
 | |
|         ``check_for_duplicates``
 | |
|             whether to check for duplicate entries when adding entries to the
 | |
|             file (optional, default: ``False``).
 | |
|         """
 | |
|         enc = kwargs.get('encoding', default_encoding)
 | |
|         if _is_file(pofile):
 | |
|             try:
 | |
|                 self.fhandle = open(pofile, encoding=enc)
 | |
|             except LookupError:
 | |
|                 enc = default_encoding
 | |
|                 self.fhandle = open(pofile, encoding=enc)
 | |
|         else:
 | |
|             self.fhandle = pofile.splitlines()
 | |
| 
 | |
|         klass = kwargs.get('klass')
 | |
|         if klass is None:
 | |
|             klass = POFile
 | |
|         self.instance = klass(
 | |
|             pofile=pofile,
 | |
|             encoding=enc,
 | |
|             check_for_duplicates=kwargs.get('check_for_duplicates', False)
 | |
|         )
 | |
|         self.transitions = {}
 | |
|         self.current_line = 0
 | |
|         self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_state = 'st'
 | |
|         self.current_token = None
 | |
|         # two memo flags used in handlers
 | |
|         self.msgstr_index = 0
 | |
|         self.entry_obsolete = 0
 | |
|         # Configure the state machine, by adding transitions.
 | |
|         # Signification of symbols:
 | |
|         #     * ST: Beginning of the file (start)
 | |
|         #     * HE: Header
 | |
|         #     * TC: a translation comment
 | |
|         #     * GC: a generated comment
 | |
|         #     * OC: a file/line occurrence
 | |
|         #     * FL: a flags line
 | |
|         #     * CT: a message context
 | |
|         #     * PC: a previous msgctxt
 | |
|         #     * PM: a previous msgid
 | |
|         #     * PP: a previous msgid_plural
 | |
|         #     * MI: a msgid
 | |
|         #     * MP: a msgid plural
 | |
|         #     * MS: a msgstr
 | |
|         #     * MX: a msgstr plural
 | |
|         #     * MC: a msgid or msgstr continuation line
 | |
|         all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
 | |
|                'ms', 'mp', 'mx', 'mi']
 | |
| 
 | |
|         self.add('tc', ['st', 'he'],                                     'he')
 | |
|         self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
 | |
|                         'mp', 'mx', 'mi'],                               'tc')
 | |
|         self.add('gc', all,                                              'gc')
 | |
|         self.add('oc', all,                                              'oc')
 | |
|         self.add('fl', all,                                              'fl')
 | |
|         self.add('pc', all,                                              'pc')
 | |
|         self.add('pm', all,                                              'pm')
 | |
|         self.add('pp', all,                                              'pp')
 | |
|         self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
 | |
|                         'pp', 'ms', 'mx'],                               'ct')
 | |
|         self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
 | |
|                  'pm', 'pp', 'ms', 'mx'],                                'mi')
 | |
|         self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
 | |
|         self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
 | |
|         self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
 | |
|         self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
 | |
| 
 | |
|     def parse(self):
 | |
|         """
 | |
|         Run the state machine, parse the file line by line and call process()
 | |
|         with the current matched symbol.
 | |
|         """
 | |
|         try:
 | |
|             keywords = {
 | |
|                 'msgctxt': 'ct',
 | |
|                 'msgid': 'mi',
 | |
|                 'msgstr': 'ms',
 | |
|                 'msgid_plural': 'mp',
 | |
|             }
 | |
|             prev_keywords = {
 | |
|                 'msgid_plural': 'pp',
 | |
|                 'msgid': 'pm',
 | |
|                 'msgctxt': 'pc',
 | |
|             }
 | |
|             tokens = []
 | |
|             fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
 | |
|             for line in self.fhandle:
 | |
|                 self.current_line += 1
 | |
|                 if self.current_line == 1:
 | |
|                     BOM = codecs.BOM_UTF8.decode('utf-8')
 | |
|                     if line.startswith(BOM):
 | |
|                         line = line[len(BOM):]
 | |
|                 line = line.strip()
 | |
|                 if line == '':
 | |
|                     continue
 | |
| 
 | |
|                 tokens = line.split(None, 2)
 | |
|                 nb_tokens = len(tokens)
 | |
| 
 | |
|                 if tokens[0] == '#~|':
 | |
|                     continue
 | |
| 
 | |
|                 if tokens[0] == '#~' and nb_tokens > 1:
 | |
|                     line = line[3:].strip()
 | |
|                     tokens = tokens[1:]
 | |
|                     nb_tokens -= 1
 | |
|                     self.entry_obsolete = 1
 | |
|                 else:
 | |
|                     self.entry_obsolete = 0
 | |
| 
 | |
|                 # Take care of keywords like
 | |
|                 # msgid, msgid_plural, msgctxt & msgstr.
 | |
|                 if tokens[0] in keywords and nb_tokens > 1:
 | |
|                     line = line[len(tokens[0]):].lstrip()
 | |
|                     if re.search(r'([^\\]|^)"', line[1:-1]):
 | |
|                         raise OSError('Syntax error in po file %s(line %s): '
 | |
|                                       'unescaped double quote found' %
 | |
|                                       (fpath, self.current_line))
 | |
|                     self.current_token = line
 | |
|                     self.process(keywords[tokens[0]])
 | |
|                     continue
 | |
| 
 | |
|                 self.current_token = line
 | |
| 
 | |
|                 if tokens[0] == '#:':
 | |
|                     if nb_tokens <= 1:
 | |
|                         continue
 | |
|                     # we are on a occurrences line
 | |
|                     self.process('oc')
 | |
| 
 | |
|                 elif line[:1] == '"':
 | |
|                     # we are on a continuation line
 | |
|                     if re.search(r'([^\\]|^)"', line[1:-1]):
 | |
|                         raise OSError('Syntax error in po file %s(line %s): '
 | |
|                                       'unescaped double quote found' %
 | |
|                                       (fpath, self.current_line))
 | |
|                     self.process('mc')
 | |
| 
 | |
|                 elif line[:7] == 'msgstr[':
 | |
|                     # we are on a msgstr plural
 | |
|                     self.process('mx')
 | |
| 
 | |
|                 elif tokens[0] == '#,':
 | |
|                     if nb_tokens <= 1:
 | |
|                         continue
 | |
|                     # we are on a flags line
 | |
|                     self.process('fl')
 | |
| 
 | |
|                 elif tokens[0] == '#' or tokens[0].startswith('##'):
 | |
|                     if line == '#':
 | |
|                         line += ' '
 | |
|                     # we are on a translator comment line
 | |
|                     self.process('tc')
 | |
| 
 | |
|                 elif tokens[0] == '#.':
 | |
|                     if nb_tokens <= 1:
 | |
|                         continue
 | |
|                     # we are on a generated comment line
 | |
|                     self.process('gc')
 | |
| 
 | |
|                 elif tokens[0] == '#|':
 | |
|                     if nb_tokens <= 1:
 | |
|                         raise OSError('Syntax error in po file %s(line %s)' %
 | |
|                                       (fpath, self.current_line))
 | |
| 
 | |
|                     # Remove the marker and any whitespace right after that.
 | |
|                     line = line[2:].lstrip()
 | |
|                     self.current_token = line
 | |
| 
 | |
|                     if tokens[1].startswith('"'):
 | |
|                         # Continuation of previous metadata.
 | |
|                         self.process('mc')
 | |
|                         continue
 | |
| 
 | |
|                     if nb_tokens == 2:
 | |
|                         # Invalid continuation line.
 | |
|                         raise OSError('Syntax error in po file %s(line %s): '
 | |
|                                       'invalid continuation line' %
 | |
|                                       (fpath, self.current_line))
 | |
| 
 | |
|                     # we are on a "previous translation" comment line,
 | |
|                     if tokens[1] not in prev_keywords:
 | |
|                         # Unknown keyword in previous translation comment.
 | |
|                         raise OSError('Syntax error in po file %s(line %s): '
 | |
|                                       'unknown keyword %s' %
 | |
|                                       (fpath, self.current_line,
 | |
|                                        tokens[1]))
 | |
| 
 | |
|                     # Remove the keyword and any whitespace
 | |
|                     # between it and the starting quote.
 | |
|                     line = line[len(tokens[1]):].lstrip()
 | |
|                     self.current_token = line
 | |
|                     self.process(prev_keywords[tokens[1]])
 | |
| 
 | |
|                 else:
 | |
|                     raise OSError('Syntax error in po file %s(line %s)' %
 | |
|                                   (fpath, self.current_line))
 | |
| 
 | |
|             if self.current_entry and len(tokens) > 0 and \
 | |
|                not tokens[0].startswith('#'):
 | |
|                 # since entries are added when another entry is found, we must
 | |
|                 # add the last entry here (only if there are lines). Trailing
 | |
|                 # comments are ignored
 | |
|                 self.instance.append(self.current_entry)
 | |
| 
 | |
|             # before returning the instance, check if there's metadata and if
 | |
|             # so extract it in a dict
 | |
|             metadataentry = self.instance.find('')
 | |
|             if metadataentry:  # metadata found
 | |
|                 # remove the entry
 | |
|                 self.instance.remove(metadataentry)
 | |
|                 self.instance.metadata_is_fuzzy = metadataentry.flags
 | |
|                 key = None
 | |
|                 for msg in metadataentry.msgstr.splitlines():
 | |
|                     try:
 | |
|                         key, val = msg.split(':', 1)
 | |
|                         self.instance.metadata[key] = val.strip()
 | |
|                     except (ValueError, KeyError):
 | |
|                         if key is not None:
 | |
|                             self.instance.metadata[key] += '\n' + msg.strip()
 | |
|         finally:
 | |
|             # close opened file
 | |
|             if not isinstance(self.fhandle, list):  # must be file
 | |
|                 self.fhandle.close()
 | |
|         return self.instance
 | |
| 
 | |
|     def add(self, symbol, states, next_state):
 | |
|         """
 | |
|         Add a transition to the state machine.
 | |
| 
 | |
|         Keywords arguments:
 | |
| 
 | |
|         ``symbol``
 | |
|             string, the matched token (two chars symbol).
 | |
| 
 | |
|         ``states``
 | |
|             list, a list of states (two chars symbols).
 | |
| 
 | |
|         ``next_state``
 | |
|             the next state the fsm will have after the action.
 | |
|         """
 | |
|         for state in states:
 | |
|             action = getattr(self, 'handle_%s' % next_state)
 | |
|             self.transitions[(symbol, state)] = (action, next_state)
 | |
| 
 | |
|     def process(self, symbol):
 | |
|         """
 | |
|         Process the transition corresponding to the current state and the
 | |
|         symbol provided.
 | |
| 
 | |
|         Keywords arguments:
 | |
| 
 | |
|         ``symbol``
 | |
|             string, the matched token (two chars symbol).
 | |
| 
 | |
|         ``linenum``
 | |
|             integer, the current line number of the parsed file.
 | |
|         """
 | |
|         try:
 | |
|             (action, state) = self.transitions[(symbol, self.current_state)]
 | |
|             if action():
 | |
|                 self.current_state = state
 | |
|         except Exception:
 | |
|             fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
 | |
|             if hasattr(self.fhandle, 'close'):
 | |
|                 self.fhandle.close()
 | |
|             raise OSError('Syntax error in po file %s(line %s)' %
 | |
|                           (fpath, self.current_line))
 | |
| 
 | |
|     # state handlers
 | |
| 
 | |
|     def handle_he(self):
 | |
|         """Handle a header comment."""
 | |
|         if self.instance.header != '':
 | |
|             self.instance.header += '\n'
 | |
|         self.instance.header += self.current_token[2:]
 | |
|         return 1
 | |
| 
 | |
|     def handle_tc(self):
 | |
|         """Handle a translator comment."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         if self.current_entry.tcomment != '':
 | |
|             self.current_entry.tcomment += '\n'
 | |
|         tcomment = self.current_token.lstrip('#')
 | |
|         if tcomment.startswith(' '):
 | |
|             tcomment = tcomment[1:]
 | |
|         self.current_entry.tcomment += tcomment
 | |
|         return True
 | |
| 
 | |
|     def handle_gc(self):
 | |
|         """Handle a generated comment."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         if self.current_entry.comment != '':
 | |
|             self.current_entry.comment += '\n'
 | |
|         self.current_entry.comment += self.current_token[3:]
 | |
|         return True
 | |
| 
 | |
|     def handle_oc(self):
 | |
|         """Handle a file:num occurrence."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         occurrences = self.current_token[3:].split()
 | |
|         for occurrence in occurrences:
 | |
|             if occurrence != '':
 | |
|                 try:
 | |
|                     fil, line = occurrence.rsplit(':', 1)
 | |
|                     if not line.isdigit():
 | |
|                         fil = occurrence
 | |
|                         line = ''
 | |
|                     self.current_entry.occurrences.append((fil, line))
 | |
|                 except (ValueError, AttributeError):
 | |
|                     self.current_entry.occurrences.append((occurrence, ''))
 | |
|         return True
 | |
| 
 | |
|     def handle_fl(self):
 | |
|         """Handle a flags line."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.flags += [c.strip() for c in
 | |
|                                      self.current_token[3:].split(',')]
 | |
|         return True
 | |
| 
 | |
|     def handle_pp(self):
 | |
|         """Handle a previous msgid_plural line."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.previous_msgid_plural = \
 | |
|             unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_pm(self):
 | |
|         """Handle a previous msgid line."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.previous_msgid = \
 | |
|             unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_pc(self):
 | |
|         """Handle a previous msgctxt line."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.previous_msgctxt = \
 | |
|             unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_ct(self):
 | |
|         """Handle a msgctxt."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.msgctxt = unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_mi(self):
 | |
|         """Handle a msgid."""
 | |
|         if self.current_state in ['mc', 'ms', 'mx']:
 | |
|             self.instance.append(self.current_entry)
 | |
|             self.current_entry = POEntry(linenum=self.current_line)
 | |
|         self.current_entry.obsolete = self.entry_obsolete
 | |
|         self.current_entry.msgid = unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_mp(self):
 | |
|         """Handle a msgid plural."""
 | |
|         self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_ms(self):
 | |
|         """Handle a msgstr."""
 | |
|         self.current_entry.msgstr = unescape(self.current_token[1:-1])
 | |
|         return True
 | |
| 
 | |
|     def handle_mx(self):
 | |
|         """Handle a msgstr plural."""
 | |
|         index = self.current_token[7]
 | |
|         value = self.current_token[self.current_token.find('"') + 1:-1]
 | |
|         self.current_entry.msgstr_plural[int(index)] = unescape(value)
 | |
|         self.msgstr_index = int(index)
 | |
|         return True
 | |
| 
 | |
|     def handle_mc(self):
 | |
|         """Handle a msgid or msgstr continuation line."""
 | |
|         token = unescape(self.current_token[1:-1])
 | |
|         if self.current_state == 'ct':
 | |
|             self.current_entry.msgctxt += token
 | |
|         elif self.current_state == 'mi':
 | |
|             self.current_entry.msgid += token
 | |
|         elif self.current_state == 'mp':
 | |
|             self.current_entry.msgid_plural += token
 | |
|         elif self.current_state == 'ms':
 | |
|             self.current_entry.msgstr += token
 | |
|         elif self.current_state == 'mx':
 | |
|             self.current_entry.msgstr_plural[self.msgstr_index] += token
 | |
|         elif self.current_state == 'pp':
 | |
|             self.current_entry.previous_msgid_plural += token
 | |
|         elif self.current_state == 'pm':
 | |
|             self.current_entry.previous_msgid += token
 | |
|         elif self.current_state == 'pc':
 | |
|             self.current_entry.previous_msgctxt += token
 | |
|         # don't change the current state
 | |
|         return False
 | |
| # }}}
 | |
| 
 | |
| 
 | |
| # class _MOFileParser {{{
 | |
| class _MOFileParser:
 | |
|     """
 | |
|     A class to parse binary mo files.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, mofile, *args, **kwargs):
 | |
|         """
 | |
|         Constructor.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         ``mofile``
 | |
|             string, path to the mo file or its content
 | |
| 
 | |
|         ``encoding``
 | |
|             string, the encoding to use, defaults to ``default_encoding``
 | |
|             global variable (optional).
 | |
| 
 | |
|         ``check_for_duplicates``
 | |
|             whether to check for duplicate entries when adding entries to the
 | |
|             file (optional, default: ``False``).
 | |
|         """
 | |
|         if _is_file(mofile):
 | |
|             self.fhandle = open(mofile, 'rb')
 | |
|         else:
 | |
|             self.fhandle = io.BytesIO(mofile)
 | |
| 
 | |
|         klass = kwargs.get('klass')
 | |
|         if klass is None:
 | |
|             klass = MOFile
 | |
|         self.instance = klass(
 | |
|             fpath=mofile,
 | |
|             encoding=kwargs.get('encoding', default_encoding),
 | |
|             check_for_duplicates=kwargs.get('check_for_duplicates', False)
 | |
|         )
 | |
| 
 | |
|     def __del__(self):
 | |
|         """
 | |
|         Make sure the file is closed, this prevents warnings on unclosed file
 | |
|         when running tests with python >= 3.2.
 | |
|         """
 | |
|         if self.fhandle and hasattr(self.fhandle, 'close'):
 | |
|             self.fhandle.close()
 | |
| 
 | |
|     def parse(self):
 | |
|         """
 | |
|         Build the instance with the file handle provided in the
 | |
|         constructor.
 | |
|         """
 | |
|         # parse magic number
 | |
|         magic_number = self._readbinary('<I', 4)
 | |
|         if magic_number == MOFile.MAGIC:
 | |
|             ii = '<II'
 | |
|         elif magic_number == MOFile.MAGIC_SWAPPED:
 | |
|             ii = '>II'
 | |
|         else:
 | |
|             raise OSError('Invalid mo file, magic number is incorrect !')
 | |
|         self.instance.magic_number = magic_number
 | |
|         # parse the version number and the number of strings
 | |
|         version, numofstrings = self._readbinary(ii, 8)
 | |
|         # from MO file format specs: "A program seeing an unexpected major
 | |
|         # revision number should stop reading the MO file entirely"
 | |
|         if version >> 16 not in (0, 1):
 | |
|             raise OSError('Invalid mo file, unexpected major revision number')
 | |
|         self.instance.version = version
 | |
|         # original strings and translation strings hash table offset
 | |
|         msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
 | |
|         # move to msgid hash table and read length and offset of msgids
 | |
|         self.fhandle.seek(msgids_hash_offset)
 | |
|         msgids_index = []
 | |
|         for i in range(numofstrings):
 | |
|             msgids_index.append(self._readbinary(ii, 8))
 | |
|         # move to msgstr hash table and read length and offset of msgstrs
 | |
|         self.fhandle.seek(msgstrs_hash_offset)
 | |
|         msgstrs_index = []
 | |
|         for i in range(numofstrings):
 | |
|             msgstrs_index.append(self._readbinary(ii, 8))
 | |
|         # build entries
 | |
|         encoding = self.instance.encoding
 | |
|         for i in range(numofstrings):
 | |
|             self.fhandle.seek(msgids_index[i][1])
 | |
|             msgid = self.fhandle.read(msgids_index[i][0])
 | |
| 
 | |
|             self.fhandle.seek(msgstrs_index[i][1])
 | |
|             msgstr = self.fhandle.read(msgstrs_index[i][0])
 | |
|             if i == 0 and not msgid:  # metadata
 | |
|                 raw_metadata, metadata = msgstr.split(b('\n')), {}
 | |
|                 for line in raw_metadata:
 | |
|                     tokens = line.split(b(':'), 1)
 | |
|                     if tokens[0] != b(''):
 | |
|                         try:
 | |
|                             k = tokens[0].decode(encoding)
 | |
|                             v = tokens[1].decode(encoding)
 | |
|                             metadata[k] = v.strip()
 | |
|                         except IndexError:
 | |
|                             metadata[k] = u('')
 | |
|                 self.instance.metadata = metadata
 | |
|                 continue
 | |
|             # test if we have a plural entry
 | |
|             msgid_tokens = msgid.split(b('\0'))
 | |
|             if len(msgid_tokens) > 1:
 | |
|                 entry = self._build_entry(
 | |
|                     msgid=msgid_tokens[0],
 | |
|                     msgid_plural=msgid_tokens[1],
 | |
|                     msgstr_plural={k: v for k, v in
 | |
|                                        enumerate(msgstr.split(b('\0')))}
 | |
|                 )
 | |
|             else:
 | |
|                 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
 | |
|             self.instance.append(entry)
 | |
|         # close opened file
 | |
|         self.fhandle.close()
 | |
|         return self.instance
 | |
| 
 | |
|     def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
 | |
|                      msgstr_plural=None):
 | |
|         msgctxt_msgid = msgid.split(b('\x04'))
 | |
|         encoding = self.instance.encoding
 | |
|         if len(msgctxt_msgid) > 1:
 | |
|             kwargs = {
 | |
|                 'msgctxt': msgctxt_msgid[0].decode(encoding),
 | |
|                 'msgid': msgctxt_msgid[1].decode(encoding),
 | |
|             }
 | |
|         else:
 | |
|             kwargs = {'msgid': msgid.decode(encoding)}
 | |
|         if msgstr:
 | |
|             kwargs['msgstr'] = msgstr.decode(encoding)
 | |
|         if msgid_plural:
 | |
|             kwargs['msgid_plural'] = msgid_plural.decode(encoding)
 | |
|         if msgstr_plural:
 | |
|             for k in msgstr_plural:
 | |
|                 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
 | |
|             kwargs['msgstr_plural'] = msgstr_plural
 | |
|         return MOEntry(**kwargs)
 | |
| 
 | |
|     def _readbinary(self, fmt, numbytes):
 | |
|         """
 | |
|         Private method that unpack n bytes of data using format <fmt>.
 | |
|         It returns a tuple or a mixed value if the tuple length is 1.
 | |
|         """
 | |
|         bytes = self.fhandle.read(numbytes)
 | |
|         tup = struct.unpack(fmt, bytes)
 | |
|         if len(tup) == 1:
 | |
|             return tup[0]
 | |
|         return tup
 | |
| # }}}
 |