Make memory usage minimization optional, thereby achieving an upto 10x speedup.

This commit is contained in:
Kovid Goyal 2007-11-16 18:17:18 +00:00
parent bf5b924058
commit f3080a42b0
2 changed files with 8 additions and 5 deletions

View File

@ -218,6 +218,8 @@ def option_parser(usage):
help='''Be verbose while processing''') help='''Be verbose while processing''')
debug.add_option('--lrs', action='store_true', dest='lrs', \ debug.add_option('--lrs', action='store_true', dest='lrs', \
help='Convert to LRS', default=False) help='Convert to LRS', default=False)
parser.add_option('--minimize-memory-usage', action='store_true', default=False,
help=_('Minimize memory usage at the cost of longer processing times. Use this option if you are on a memory constrained machine.'))
return parser return parser
def find_custom_fonts(options, logger): def find_custom_fonts(options, logger):

View File

@ -648,8 +648,7 @@ class HTMLConverter(object):
# Need to make a copy of contents as when # Need to make a copy of contents as when
# extract is called on a child, it will # extract is called on a child, it will
# mess up the iteration. # mess up the iteration.
contents = [i for i in ptag.contents] for c in copy.copy(ptag.contents):
for c in contents:
if isinstance(c, HTMLConverter.IGNORED_TAGS): if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue continue
elif isinstance(c, Tag): elif isinstance(c, Tag):
@ -658,7 +657,8 @@ class HTMLConverter(object):
self.add_text(c, pcss, ppcss) self.add_text(c, pcss, ppcss)
if not self.in_table: if not self.in_table:
try: try:
ptag.extract() if self.minimize_memory_usage:
ptag.extract()
except AttributeError: except AttributeError:
print ptag, type(ptag) print ptag, type(ptag)
@ -1532,8 +1532,9 @@ class HTMLConverter(object):
self.logger.debug('Bad table:\n%s', str(tag)[:300]) self.logger.debug('Bad table:\n%s', str(tag)[:300])
self.in_table = False self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
finally: finally:
tag.extract() if self.minimize_memory_usage:
tag.extract()
else: else:
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
if end_page: if end_page: