From f3080a42b0bfa4ba51e9afbd7d2d2cec2c17625b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 16 Nov 2007 18:17:18 +0000 Subject: [PATCH] Make memory usage minimization optional, thereby achieving an upto 10x speedup. --- src/libprs500/ebooks/lrf/__init__.py | 2 ++ src/libprs500/ebooks/lrf/html/convert_from.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index 64be956f9e..fb00f87606 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -218,6 +218,8 @@ def option_parser(usage): help='''Be verbose while processing''') debug.add_option('--lrs', action='store_true', dest='lrs', \ help='Convert to LRS', default=False) + parser.add_option('--minimize-memory-usage', action='store_true', default=False, + help=_('Minimize memory usage at the cost of longer processing times. Use this option if you are on a memory constrained machine.')) return parser def find_custom_fonts(options, logger): diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 78f478b3d7..382fac464b 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -648,8 +648,7 @@ class HTMLConverter(object): # Need to make a copy of contents as when # extract is called on a child, it will # mess up the iteration. - contents = [i for i in ptag.contents] - for c in contents: + for c in copy.copy(ptag.contents): if isinstance(c, HTMLConverter.IGNORED_TAGS): continue elif isinstance(c, Tag): @@ -658,7 +657,8 @@ class HTMLConverter(object): self.add_text(c, pcss, ppcss) if not self.in_table: try: - ptag.extract() + if self.minimize_memory_usage: + ptag.extract() except AttributeError: print ptag, type(ptag) @@ -1532,8 +1532,9 @@ class HTMLConverter(object): self.logger.debug('Bad table:\n%s', str(tag)[:300]) self.in_table = False self.process_children(tag, tag_css, tag_pseudo_css) - finally: - tag.extract() + finally: + if self.minimize_memory_usage: + tag.extract() else: self.process_children(tag, tag_css, tag_pseudo_css) if end_page: