mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Fix #2953 (missing ì)
This commit is contained in:
parent
090b322f8d
commit
2330c2a88e
@ -303,6 +303,11 @@ class MobiReader(object):
|
||||
self.cleanup_html()
|
||||
|
||||
self.log.debug('Parsing HTML...')
|
||||
try:
|
||||
root = html.fromstring(self.processed_html)
|
||||
except:
|
||||
self.log.warning('MOBI markup appears to contain random bytes. Stripping.')
|
||||
self.processed_html = self.remove_random_bytes(self.processed_html)
|
||||
root = html.fromstring(self.processed_html)
|
||||
if root.xpath('descendant::p/descendant::p'):
|
||||
from lxml.html import soupparser
|
||||
@ -444,7 +449,10 @@ class MobiReader(object):
|
||||
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
|
||||
self.processed_html = self.processed_html.replace('\r\n', '\n')
|
||||
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
||||
self.processed_html = re.sub('\x14|\x15|\x1c|\x1d|\xef|\x12|\x13|\xec', '', self.processed_html)
|
||||
|
||||
def remove_random_bytes(self, html):
|
||||
return re.sub('\x14|\x15|\x1c|\x1d|\xef|\x12|\x13|\xec',
|
||||
'', html)
|
||||
|
||||
def ensure_unit(self, raw, unit='px'):
|
||||
if re.search(r'\d+$', raw) is not None:
|
||||
|
Loading…
x
Reference in New Issue
Block a user