From 9a79f6f4a3360bdd3c1ab0898ca2275237171227 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 17 Jan 2008 16:11:52 +0000
Subject: [PATCH] Improved character encoding detection for html2lrf

---
 src/libprs500/ebooks/lrf/html/convert_from.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py
index 5959b752bc..e1908737d8 100644
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@@ -33,7 +33,8 @@ except ImportError:
     import Image as PILImage
 
 from libprs500.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
-                            NavigableString, Declaration, ProcessingInstruction
+                            NavigableString, Declaration, ProcessingInstruction, \
+                            UnicodeDammit
 from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
                 TextBlock, ImageBlock, JumpButton, CharButton, \
                 Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
@@ -348,7 +349,7 @@ class HTMLConverter(object):
         upath = path.encode('utf-8') if isinstance(path, unicode) else path
         if not os.path.exists(upath):
             upath = upath.replace('&', '%26') #convertlit replaces & with %26 in file names 
-        raw = open(upath, 'rb').read()
+        raw = UnicodeDammit(open(upath, 'rb').read()).unicode
         soup = self.preprocess(raw)
         self.logger.info('\tConverting to BBeB...')
         self.current_page = None