Fix a couple of bugs in msgfmt.py

The charset was not being detected because of comments and
some sphinx generated .pot files mark the first mesg as fuzzy
which was causing them to not be added, leading to the .mo file
not working. Fixes #1918073 [Translated strings not in added to the User Manual](https://bugs.launchpad.net/calibre/+bug/1918073)
This commit is contained in:
Kovid Goyal 2021-03-08 18:39:39 +05:30
parent 54c56b9d0e
commit f463c1dd97
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -50,7 +50,7 @@ def usage(code, msg=''):
def add(ctxt, id, str, fuzzy): def add(ctxt, id, str, fuzzy):
"Add a non-fuzzy translation to the dictionary." "Add a non-fuzzy translation to the dictionary."
if not fuzzy and str: if (not fuzzy or not id) and str:
if id: if id:
STATS['translated'] += 1 STATS['translated'] += 1
if ctxt is None: if ctxt is None:
@ -134,6 +134,15 @@ def make(filename, outfile):
# until we know the exact encoding # until we know the exact encoding
encoding = 'latin-1' encoding = 'latin-1'
def check_encoding():
nonlocal encoding
if not msgid and msgstr:
# See whether there is an encoding declaration
p = HeaderParser()
charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
if charset:
encoding = charset
# Parse the catalog # Parse the catalog
lno = 0 lno = 0
for l in lines: for l in lines:
@ -142,6 +151,7 @@ def make(filename, outfile):
# If we get a comment line after a msgstr, this is a new entry # If we get a comment line after a msgstr, this is a new entry
if l[0] == '#' and section == STR: if l[0] == '#' and section == STR:
add(msgctxt, msgid, msgstr, fuzzy) add(msgctxt, msgid, msgstr, fuzzy)
check_encoding()
section = msgctxt = None section = msgctxt = None
fuzzy = 0 fuzzy = 0
# Record a fuzzy mark # Record a fuzzy mark
@ -154,23 +164,13 @@ def make(filename, outfile):
if l.startswith('msgctxt'): if l.startswith('msgctxt'):
if section == STR: if section == STR:
add(msgctxt, msgid, msgstr, fuzzy) add(msgctxt, msgid, msgstr, fuzzy)
check_encoding()
section = CTXT section = CTXT
l = l[7:] l = l[7:]
msgctxt = b'' msgctxt = b''
elif l.startswith('msgid') and not l.startswith('msgid_plural'): elif l.startswith('msgid') and not l.startswith('msgid_plural'):
if section == STR: if section == STR:
add(msgctxt, msgid, msgstr, fuzzy) add(msgctxt, msgid, msgstr, fuzzy)
if not msgid:
# See whether there is an encoding declaration
p = HeaderParser()
if sys.version_info.major > 2:
charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
else:
charset = p.parsestr(msgstr.decode(encoding).encode('ascii', 'replace')).get_content_charset()
if isinstance(charset, bytes):
charset = charset.decode('ascii')
if charset:
encoding = charset
section = ID section = ID
l = l[5:] l = l[5:]
msgid = msgstr = b'' msgid = msgstr = b''