Improve performance of large LRF files on the SONY Reader and make html2lrf more robust when handling <script/> tags

This commit is contained in:
Kovid Goyal 2008-12-18 11:34:53 -08:00
commit 30a11fdfaa
3 changed files with 5 additions and 4 deletions

View File

@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \ from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
fit_image, LoggingInterface, preferred_encoding fit_image, LoggingInterface, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.opf import OPFReader
from calibre.devices.interface import Device from calibre.devices.interface import Device
from calibre.ebooks.lrf.html.color_map import lrs_color from calibre.ebooks.lrf.html.color_map import lrs_color
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface):
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL), (re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
strip_style_comments), strip_style_comments),
# Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
] ]
# Fix Baen markup # Fix Baen markup
@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface):
soup = BeautifulSoup(raw, soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.XHTML_ENTITIES, convertEntities=BeautifulSoup.XHTML_ENTITIES,
markupMassage=nmassage) markupMassage=nmassage)
else:
raise
if not self.baen and self.is_baen(soup): if not self.baen and self.is_baen(soup):
self.baen = True self.baen = True
self.log_info(_('\tBaen file detected. Re-parsing...')) self.log_info(_('\tBaen file detected. Re-parsing...'))

View File

@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer):
#print "page contents:", pageContent #print "page contents:", pageContent
# ObjectList not needed and causes slowdown in SONY LRF renderer # ObjectList not needed and causes slowdown in SONY LRF renderer
p.appendLrfTag(LrfTag("ObjectList", pageContent)) #p.appendLrfTag(LrfTag("ObjectList", pageContent))
p.appendLrfTag(LrfTag("Link", self.pageStyle.objId)) p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId())) p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
p.appendTagDict(self.settings) p.appendTagDict(self.settings)

View File

@ -27,7 +27,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <assert.h> #include <assert.h>
#include <strings.h>
#include <string.h> #include <string.h>
#ifdef DEBUG_PERF #ifdef DEBUG_PERF
#include <sys/time.h> #include <sys/time.h>