mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve performance of large LRF files on the SONY Reader and make html2lrf more robust when handling <script/> tags
This commit is contained in:
commit
30a11fdfaa
@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table
|
|||||||
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
|
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
|
||||||
fit_image, LoggingInterface, preferred_encoding
|
fit_image, LoggingInterface, preferred_encoding
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.ebooks.metadata.opf import OPFReader
|
|
||||||
from calibre.devices.interface import Device
|
from calibre.devices.interface import Device
|
||||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
||||||
strip_style_comments),
|
strip_style_comments),
|
||||||
|
|
||||||
|
# Remove self closing script tags as they also mess up BeautifulSoup
|
||||||
|
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
|
||||||
|
|
||||||
]
|
]
|
||||||
# Fix Baen markup
|
# Fix Baen markup
|
||||||
@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
soup = BeautifulSoup(raw,
|
soup = BeautifulSoup(raw,
|
||||||
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
||||||
markupMassage=nmassage)
|
markupMassage=nmassage)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
if not self.baen and self.is_baen(soup):
|
if not self.baen and self.is_baen(soup):
|
||||||
self.baen = True
|
self.baen = True
|
||||||
self.log_info(_('\tBaen file detected. Re-parsing...'))
|
self.log_info(_('\tBaen file detected. Re-parsing...'))
|
||||||
|
@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer):
|
|||||||
|
|
||||||
#print "page contents:", pageContent
|
#print "page contents:", pageContent
|
||||||
# ObjectList not needed and causes slowdown in SONY LRF renderer
|
# ObjectList not needed and causes slowdown in SONY LRF renderer
|
||||||
p.appendLrfTag(LrfTag("ObjectList", pageContent))
|
#p.appendLrfTag(LrfTag("ObjectList", pageContent))
|
||||||
p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
|
p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
|
||||||
p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
|
p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
|
||||||
p.appendTagDict(self.settings)
|
p.appendTagDict(self.settings)
|
||||||
|
@ -27,7 +27,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <strings.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#ifdef DEBUG_PERF
|
#ifdef DEBUG_PERF
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user