From f463c1dd972dc43721730b0f6457e0b98e3bcbbd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 8 Mar 2021 18:39:39 +0530 Subject: [PATCH] Fix a couple of bugs in msgfmt.py The charset was not being detected because of comments and some sphinx generated .pot files mark the first mesg as fuzzy which was causing them to not be added, leading to the .mo file not working. Fixes #1918073 [Translated strings not in added to the User Manual](https://bugs.launchpad.net/calibre/+bug/1918073) --- src/calibre/translations/msgfmt.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/calibre/translations/msgfmt.py b/src/calibre/translations/msgfmt.py index 44f92df4a9..0e9b325a96 100644 --- a/src/calibre/translations/msgfmt.py +++ b/src/calibre/translations/msgfmt.py @@ -50,7 +50,7 @@ def usage(code, msg=''): def add(ctxt, id, str, fuzzy): "Add a non-fuzzy translation to the dictionary." - if not fuzzy and str: + if (not fuzzy or not id) and str: if id: STATS['translated'] += 1 if ctxt is None: @@ -134,6 +134,15 @@ def make(filename, outfile): # until we know the exact encoding encoding = 'latin-1' + def check_encoding(): + nonlocal encoding + if not msgid and msgstr: + # See whether there is an encoding declaration + p = HeaderParser() + charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() + if charset: + encoding = charset + # Parse the catalog lno = 0 for l in lines: @@ -142,6 +151,7 @@ def make(filename, outfile): # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: add(msgctxt, msgid, msgstr, fuzzy) + check_encoding() section = msgctxt = None fuzzy = 0 # Record a fuzzy mark @@ -154,23 +164,13 @@ def make(filename, outfile): if l.startswith('msgctxt'): if section == STR: add(msgctxt, msgid, msgstr, fuzzy) + check_encoding() section = CTXT l = l[7:] msgctxt = b'' elif l.startswith('msgid') and not l.startswith('msgid_plural'): if section == STR: add(msgctxt, msgid, msgstr, fuzzy) - if not msgid: - # See whether there is an encoding declaration - p = HeaderParser() - if sys.version_info.major > 2: - charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() - else: - charset = p.parsestr(msgstr.decode(encoding).encode('ascii', 'replace')).get_content_charset() - if isinstance(charset, bytes): - charset = charset.decode('ascii') - if charset: - encoding = charset section = ID l = l[5:] msgid = msgstr = b''