Fix handling of encoding

2025-11-03 19:17:02 -05:00 · 2007-07-26 17:19:38 +00:00 · 2007-07-26 17:19:38 +00:00 · a69e3991b3
commit a69e3991b3
parent 5b7416ff84
1 changed files with 18 additions and 19 deletions
--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@ -27,11 +27,10 @@ from libprs500.ebooks.markdown import markdown
 def option_parser():
    parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
        '''%prog converts mybook.txt to mybook.lrf''')
    defenc = 'cp1252' if iswindows else 'utf8'
    enchelp = 'Set the encoding used to decode ' + \
-              'the text in mybook.txt. Default encoding is %default'
+              'the text in mybook.txt. Default is to try to autodetect.'
    parser.add_option('-e', '--encoding', action='store', type='string', \
-                      dest='encoding', help=enchelp, default=defenc)
+                      dest='encoding', help=enchelp, default=None)
    return parser
@ -40,27 +39,27 @@ def generate_html(txtfile, encoding):
    Convert txtfile to html and return a PersistentTemporaryFile object pointing
    to the file with the HTML.
    '''
-    encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
+    enc = encoding
-    if iswindows:
+    if not encoding:
-        encodings = ['cp1252'] + encodings
+        encodings = ['cp1252', 'latin-1', 'iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
-    if encoding not in ['cp1252', 'utf8']:
+        txt, enc = None, None
-        encodings = [encoding] + encodings
+        for encoding in encodings:
-    txt, enc = None, None
+            try:
-    for encoding in encodings:
+                txt = codecs.open(txtfile, 'rb', encoding).read()
-        try:
+            except UnicodeDecodeError:
-            txt = codecs.open(txtfile, 'rb', encoding).read()
+                continue
-        except UnicodeDecodeError:
+            enc = encoding
-            continue
+            break
-        enc = encoding
+        if txt == None:
-        break
+            raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
-    if txt == None:
+    else:
-        raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
+        txt = codecs.open(txtfile, 'rb', enc).read()
    md = markdown.Markdown(txt,
                           extensions=['footnotes', 'tables', 'toc'],
                           encoding=enc,
                           safe_mode=False,
                           )
-    html = md.toString().decode(enc)
+    html = md.toString()
    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
    p.close()
    codecs.open(p.name, 'wb', enc).write(html)