mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Reduce memory usage of html2lrf. This commit could have introduced many regressions.
This commit is contained in:
parent
f95eead55c
commit
3d007aacf1
@ -299,13 +299,6 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
class Link(object):
|
|
||||||
def __init__(self, para, tag):
|
|
||||||
self.para = para
|
|
||||||
self.tag = tag
|
|
||||||
|
|
||||||
processed_files = {} #: Files that have been processed
|
|
||||||
|
|
||||||
def __hasattr__(self, attr):
|
def __hasattr__(self, attr):
|
||||||
if hasattr(self.options, attr):
|
if hasattr(self.options, attr):
|
||||||
return True
|
return True
|
||||||
@ -322,9 +315,28 @@ class HTMLConverter(object):
|
|||||||
else:
|
else:
|
||||||
object.__setattr__(self, attr, val)
|
object.__setattr__(self, attr, val)
|
||||||
|
|
||||||
def __init__(self, book, fonts, path, options, logger,
|
CSS = {
|
||||||
link_level=0, is_root=True,
|
'h1' : {"font-size" : "xx-large", "font-weight":"bold", 'text-indent':'0pt'},
|
||||||
rotated_images={}, scaled_images={}, images={}, memory=[]):
|
'h2' : {"font-size" : "x-large", "font-weight":"bold", 'text-indent':'0pt'},
|
||||||
|
'h3' : {"font-size" : "large", "font-weight":"bold", 'text-indent':'0pt'},
|
||||||
|
'h4' : {"font-size" : "large", 'text-indent':'0pt'},
|
||||||
|
'h5' : {"font-weight" : "bold", 'text-indent':'0pt'},
|
||||||
|
'b' : {"font-weight" : "bold"},
|
||||||
|
'strong' : {"font-weight" : "bold"},
|
||||||
|
'i' : {"font-style" : "italic"},
|
||||||
|
'cite' : {'font-style' : 'italic'},
|
||||||
|
'em' : {"font-style" : "italic"},
|
||||||
|
'small' : {'font-size' : 'small'},
|
||||||
|
'pre' : {'font-family' : 'monospace' },
|
||||||
|
'code' : {'font-family' : 'monospace' },
|
||||||
|
'tt' : {'font-family' : 'monospace'},
|
||||||
|
'center' : {'text-align' : 'center'},
|
||||||
|
'th' : {'font-size' : 'large', 'font-weight':'bold'},
|
||||||
|
'big' : {'font-size' : 'large', 'font-weight':'bold'},
|
||||||
|
'.libprs500_dropcaps' : {'font-size': 'xx-large'},
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, book, fonts, options, logger, path):
|
||||||
'''
|
'''
|
||||||
Convert HTML file at C{path} and add it to C{book}. After creating
|
Convert HTML file at C{path} and add it to C{book}. After creating
|
||||||
the object, you must call L{self.process_links} on it to create the links and
|
the object, you must call L{self.process_links} on it to create the links and
|
||||||
@ -333,57 +345,43 @@ class HTMLConverter(object):
|
|||||||
@param book: The LRF book
|
@param book: The LRF book
|
||||||
@type book: L{libprs500.lrf.pylrs.Book}
|
@type book: L{libprs500.lrf.pylrs.Book}
|
||||||
@param fonts: dict specifying the font families to use
|
@param fonts: dict specifying the font families to use
|
||||||
@param path: path to the HTML file to process
|
|
||||||
@type path: C{str}
|
|
||||||
'''
|
'''
|
||||||
# Defaults for various formatting tags
|
# Defaults for various formatting tags
|
||||||
object.__setattr__(self, 'options', options)
|
object.__setattr__(self, 'options', options)
|
||||||
self.css = dict(
|
|
||||||
h1 = {"font-size" : "xx-large", "font-weight":"bold", 'text-indent':'0pt'},
|
|
||||||
h2 = {"font-size" : "x-large", "font-weight":"bold", 'text-indent':'0pt'},
|
|
||||||
h3 = {"font-size" : "large", "font-weight":"bold", 'text-indent':'0pt'},
|
|
||||||
h4 = {"font-size" : "large", 'text-indent':'0pt'},
|
|
||||||
h5 = {"font-weight" : "bold", 'text-indent':'0pt'},
|
|
||||||
b = {"font-weight" : "bold"},
|
|
||||||
strong = {"font-weight" : "bold"},
|
|
||||||
i = {"font-style" : "italic"},
|
|
||||||
cite = {'font-style' : 'italic'},
|
|
||||||
em = {"font-style" : "italic"},
|
|
||||||
small = {'font-size' : 'small'},
|
|
||||||
pre = {'font-family' : 'monospace' },
|
|
||||||
code = {'font-family' : 'monospace' },
|
|
||||||
tt = {'font-family' : 'monospace'},
|
|
||||||
center = {'text-align' : 'center'},
|
|
||||||
th = {'font-size' : 'large', 'font-weight':'bold'},
|
|
||||||
big = {'font-size' : 'large', 'font-weight':'bold'},
|
|
||||||
)
|
|
||||||
self.css['.libprs500_dropcaps'] = {'font-size': 'xx-large'}
|
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.fonts = fonts #: dict specifting font families to use
|
self.fonts = fonts #: dict specifying font families to use
|
||||||
self.scaled_images = scaled_images #: Temporary files with scaled version of images
|
# Memory
|
||||||
self.rotated_images = rotated_images #: Temporary files with rotated version of images
|
self.scaled_images = {} #: Temporary files with scaled version of images
|
||||||
self.link_level = link_level #: Current link level
|
self.rotated_images = {} #: Temporary files with rotated version of images
|
||||||
|
self.text_styles = []#: Keep track of already used textstyles
|
||||||
|
self.block_styles = []#: Keep track of already used blockstyles
|
||||||
|
self.images = {} #: Images referenced in the HTML document
|
||||||
|
self.targets = {} #: <a name=...> and id elements
|
||||||
|
self.links = {} #: <a href=...> elements
|
||||||
|
self.processed_files = []
|
||||||
|
self.link_level = 0 #: Current link level
|
||||||
|
self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported
|
||||||
|
self.tops = {} #: element representing the top of each HTML file in the LRF file
|
||||||
|
# Styles
|
||||||
self.blockquote_style = book.create_block_style(sidemargin=60,
|
self.blockquote_style = book.create_block_style(sidemargin=60,
|
||||||
topskip=20, footskip=20)
|
topskip=20, footskip=20)
|
||||||
self.unindented_style = book.create_text_style(parindent=0)
|
self.unindented_style = book.create_text_style(parindent=0)
|
||||||
self.text_styles = []#: Keep track of already used textstyles
|
|
||||||
self.block_styles = []#: Keep track of already used blockstyles
|
|
||||||
self.images = images #: Images referenced in the HTML document
|
|
||||||
self.targets = {} #: <a name=...> elements
|
|
||||||
self.links = [] #: <a href=...> elements
|
|
||||||
self.files = {} #: links that point to other files
|
|
||||||
self.links_processed = False #: Whether links_processed has been called on this object
|
|
||||||
# Set by table processing code so that any <a name> within the table
|
# Set by table processing code so that any <a name> within the table
|
||||||
# point to the previous element
|
# point to the previous element
|
||||||
self.anchor_to_previous = None
|
self.anchor_to_previous = None
|
||||||
self.in_table = False
|
self.in_table = False
|
||||||
|
# List processing
|
||||||
self.list_level = 0
|
self.list_level = 0
|
||||||
self.list_indent = 20
|
self.list_indent = 20
|
||||||
self.list_counter = 1
|
self.list_counter = 1
|
||||||
self.memory = memory #: Used to ensure that duplicate CSS unhandled erros are not reported
|
|
||||||
self.book = book #: The Book object representing a BBeB book
|
self.book = book #: The Book object representing a BBeB book
|
||||||
self.is_root = is_root #: Are we converting the root HTML file
|
|
||||||
self.lstrip_toggle = False #: If true the next add_text call will do an lstrip
|
self.lstrip_toggle = False #: If true the next add_text call will do an lstrip
|
||||||
|
self.start_on_file(path, is_root=True)
|
||||||
|
|
||||||
|
def start_on_file(self, path, is_root=True, link_level=0):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
os.chdir(os.path.dirname(path))
|
os.chdir(os.path.dirname(path))
|
||||||
self.file_name = os.path.basename(path)
|
self.file_name = os.path.basename(path)
|
||||||
@ -398,20 +396,25 @@ class HTMLConverter(object):
|
|||||||
if self.pdftohtml:
|
if self.pdftohtml:
|
||||||
nmassage.extend(HTMLConverter.PDFTOHTML)
|
nmassage.extend(HTMLConverter.PDFTOHTML)
|
||||||
raw = unicode(raw, 'utf8', 'replace')
|
raw = unicode(raw, 'utf8', 'replace')
|
||||||
self.soup = BeautifulSoup(raw,
|
soup = BeautifulSoup(raw,
|
||||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||||
markupMassage=nmassage)
|
markupMassage=nmassage)
|
||||||
logger.info('\tConverting to BBeB...')
|
self.logger.info('\tConverting to BBeB...')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
self.current_page = None
|
self.current_page = None
|
||||||
self.current_para = None
|
self.current_para = None
|
||||||
self.current_style = {}
|
self.current_style = {}
|
||||||
self.page_break_found = False
|
self.page_break_found = False
|
||||||
match = self.PAGE_BREAK_PAT.search(unicode(self.soup))
|
match = self.PAGE_BREAK_PAT.search(unicode(soup))
|
||||||
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
|
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
|
||||||
self.page_break_found = True
|
self.page_break_found = True
|
||||||
self.parse_file()
|
self.css = HTMLConverter.CSS.copy()
|
||||||
HTMLConverter.processed_files[path] = self
|
self.target_prefix = path
|
||||||
|
self.links[path] = []
|
||||||
|
self.tops[path] = self.parse_file(soup, is_root)
|
||||||
|
self.processed_files.append(path)
|
||||||
|
self.process_links(is_root, path)
|
||||||
|
|
||||||
|
|
||||||
def parse_css(self, style):
|
def parse_css(self, style):
|
||||||
"""
|
"""
|
||||||
@ -480,7 +483,7 @@ class HTMLConverter(object):
|
|||||||
prop.update(self.parse_style_properties(tag["style"]))
|
prop.update(self.parse_style_properties(tag["style"]))
|
||||||
return prop
|
return prop
|
||||||
|
|
||||||
def parse_file(self):
|
def parse_file(self, soup, is_root):
|
||||||
def get_valid_block(page):
|
def get_valid_block(page):
|
||||||
for item in page.contents:
|
for item in page.contents:
|
||||||
if isinstance(item, (Canvas, TextBlock, ImageBlock, RuledLine)):
|
if isinstance(item, (Canvas, TextBlock, ImageBlock, RuledLine)):
|
||||||
@ -489,11 +492,11 @@ class HTMLConverter(object):
|
|||||||
self.current_page = self.book.create_page()
|
self.current_page = self.book.create_page()
|
||||||
self.current_block = self.book.create_text_block()
|
self.current_block = self.book.create_text_block()
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
if self.cover and self.is_root:
|
if self.cover and is_root:
|
||||||
self.add_image_page(self.cover)
|
self.add_image_page(self.cover)
|
||||||
self.top = self.current_block
|
top = self.current_block
|
||||||
|
|
||||||
self.process_children(self.soup, {})
|
self.process_children(soup, {})
|
||||||
|
|
||||||
if self.current_para and self.current_block:
|
if self.current_para and self.current_block:
|
||||||
self.current_para.append_to(self.current_block)
|
self.current_para.append_to(self.current_block)
|
||||||
@ -502,16 +505,16 @@ class HTMLConverter(object):
|
|||||||
if self.current_page and self.current_page.has_text():
|
if self.current_page and self.current_page.has_text():
|
||||||
self.book.append(self.current_page)
|
self.book.append(self.current_page)
|
||||||
|
|
||||||
if not self.top.parent:
|
if not top.parent:
|
||||||
if not previous:
|
if not previous:
|
||||||
try:
|
try:
|
||||||
previous = self.book.pages()[0]
|
previous = self.book.pages()[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ConversionError, self.file_name + ' does not seem to have any content'
|
raise ConversionError, self.file_name + ' does not seem to have any content'
|
||||||
self.top = get_valid_block(previous)
|
top = get_valid_block(previous)
|
||||||
if not self.top or not self.top.parent:
|
if not top or not top.parent:
|
||||||
raise ConversionError, self.file_name + ' does not seem to have any content'
|
raise ConversionError, self.file_name + ' does not seem to have any content'
|
||||||
return
|
return top
|
||||||
|
|
||||||
found = False
|
found = False
|
||||||
for page in self.book.pages():
|
for page in self.book.pages():
|
||||||
@ -519,15 +522,30 @@ class HTMLConverter(object):
|
|||||||
found = True
|
found = True
|
||||||
continue
|
continue
|
||||||
if found:
|
if found:
|
||||||
self.top = get_valid_block(page)
|
top = get_valid_block(page)
|
||||||
if not self.top:
|
if not top:
|
||||||
continue
|
continue
|
||||||
break
|
break
|
||||||
|
|
||||||
if not self.top or not self.top.parent:
|
if not top or not top.parent:
|
||||||
raise ConversionError, 'Could not parse ' + self.file_name
|
raise ConversionError, 'Could not parse ' + self.file_name
|
||||||
|
return top
|
||||||
|
|
||||||
|
def create_link(self, para, tag):
|
||||||
|
text = self.get_text(tag, 1000)
|
||||||
|
if not text:
|
||||||
|
text = 'Link'
|
||||||
|
img = tag.find('img')
|
||||||
|
if img:
|
||||||
|
try:
|
||||||
|
text = img['alt']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
url = urlparse(tag['href'])
|
||||||
|
return {'para':para, 'text':text, 'url':url}
|
||||||
|
|
||||||
|
|
||||||
def get_text(self, tag, limit=None):
|
def get_text(self, tag, limit=None):
|
||||||
css = self.tag_css(tag)
|
css = self.tag_css(tag)
|
||||||
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
||||||
@ -548,7 +566,7 @@ class HTMLConverter(object):
|
|||||||
text += self.get_text(c)
|
text += self.get_text(c)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def process_links(self):
|
def process_links(self, is_root, selfpath, link_level=0):
|
||||||
def add_toc_entry(text, target):
|
def add_toc_entry(text, target):
|
||||||
# TextBlocks in Canvases have a None parent or an Objects Parent
|
# TextBlocks in Canvases have a None parent or an Objects Parent
|
||||||
if target.parent != None and \
|
if target.parent != None and \
|
||||||
@ -590,85 +608,39 @@ class HTMLConverter(object):
|
|||||||
page.contents.remove(bs)
|
page.contents.remove(bs)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
for link in self.links:
|
for link in self.links[selfpath]:
|
||||||
para, tag = link.para, link.tag
|
try:
|
||||||
text = self.get_text(tag, 1000)
|
para, text, purl = link['para'], link['text'], link['url']
|
||||||
# Needed for TOC entries due to bug in LRF
|
# Needed for TOC entries due to bug in LRF
|
||||||
ascii_text = text.encode('ascii', 'replace')
|
ascii_text = text.encode('ascii', 'replace')
|
||||||
if not text:
|
if purl[1]: # Not a link to a file on the local filesystem
|
||||||
text = 'Link'
|
|
||||||
img = tag.find('img')
|
|
||||||
if img:
|
|
||||||
try:
|
|
||||||
text = img['alt']
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
purl = urlparse(link.tag['href'])
|
|
||||||
if purl[1]: # Not a link to a file on the local filesystem
|
|
||||||
continue
|
|
||||||
path, fragment = unquote(purl[2]), purl[5]
|
|
||||||
if not path or os.path.basename(path) == self.file_name:
|
|
||||||
if fragment in self.targets.keys():
|
|
||||||
tb = get_target_block(fragment, self.targets)
|
|
||||||
if self.is_root:
|
|
||||||
add_toc_entry(ascii_text, tb)
|
|
||||||
sys.stdout.flush()
|
|
||||||
jb = JumpButton(tb)
|
|
||||||
self.book.append(jb)
|
|
||||||
cb = CharButton(jb, text=text)
|
|
||||||
para.contents = []
|
|
||||||
para.append(cb)
|
|
||||||
elif self.link_level < self.link_levels:
|
|
||||||
try: # os.access raises Exceptions in path has null bytes
|
|
||||||
if not os.access(path.encode('utf8', 'replace'), os.R_OK):
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
self.logger.exception('Skipping %s', link)
|
|
||||||
continue
|
continue
|
||||||
path = os.path.abspath(path)
|
basepath, fragment = unquote(purl[2]), purl[5]
|
||||||
if not path in HTMLConverter.processed_files.keys():
|
if not basepath:
|
||||||
try:
|
basepath = selfpath
|
||||||
self.files[path] = HTMLConverter(
|
path = os.path.abspath(basepath)
|
||||||
self.book, self.fonts, path, self.options,
|
if link_level < self.link_levels and path not in self.processed_files:
|
||||||
self.logger,
|
try:
|
||||||
link_level = self.link_level+1,
|
self.start_on_file(path, is_root=False, link_level=link_level+1)
|
||||||
is_root = False,
|
|
||||||
rotated_images=self.rotated_images,
|
|
||||||
scaled_images=self.scaled_images,
|
|
||||||
images=self.images,
|
|
||||||
memory=self.memory)
|
|
||||||
HTMLConverter.processed_files[path] = self.files[path]
|
|
||||||
except Exception:
|
except Exception:
|
||||||
self.logger.warning('Unable to process %s', path)
|
self.logger.warning('Unable to process %s', path)
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.logger.exception(' ')
|
self.logger.exception(' ')
|
||||||
continue
|
continue
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
if path+fragment in self.targets.keys():
|
||||||
|
tb = get_target_block(path+fragment, self.targets)
|
||||||
else:
|
else:
|
||||||
self.files[path] = HTMLConverter.processed_files[path]
|
tb = self.tops[path]
|
||||||
conv = self.files[path]
|
if is_root:
|
||||||
if fragment in conv.targets.keys():
|
|
||||||
tb = get_target_block(fragment, conv.targets)
|
|
||||||
else:
|
|
||||||
tb = conv.top
|
|
||||||
if self.is_root:
|
|
||||||
add_toc_entry(ascii_text, tb)
|
add_toc_entry(ascii_text, tb)
|
||||||
jb = JumpButton(tb)
|
jb = JumpButton(tb)
|
||||||
self.book.append(jb)
|
self.book.append(jb)
|
||||||
cb = CharButton(jb, text=text)
|
cb = CharButton(jb, text=text)
|
||||||
para.contents = []
|
para.contents = []
|
||||||
para.append(cb)
|
para.append(cb)
|
||||||
|
|
||||||
self.links_processed = True
|
|
||||||
|
|
||||||
for path in self.files.keys():
|
|
||||||
if self.files[path].links_processed:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
os.chdir(os.path.dirname(path))
|
|
||||||
self.files[path].process_links()
|
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
@ -704,13 +676,18 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
def process_children(self, ptag, pcss):
|
def process_children(self, ptag, pcss):
|
||||||
""" Process the children of ptag """
|
""" Process the children of ptag """
|
||||||
for c in ptag.contents:
|
# Need to make a copy of contents as when
|
||||||
|
# extract is called on a child, it will
|
||||||
|
# mess up the iteration.
|
||||||
|
contents = [i for i in ptag.contents]
|
||||||
|
for c in contents:
|
||||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||||
continue
|
continue
|
||||||
elif isinstance(c, Tag):
|
elif isinstance(c, Tag):
|
||||||
self.parse_tag(c, pcss)
|
self.parse_tag(c, pcss)
|
||||||
elif isinstance(c, NavigableString):
|
elif isinstance(c, NavigableString):
|
||||||
self.add_text(c, pcss)
|
self.add_text(c, pcss)
|
||||||
|
ptag.extract()
|
||||||
|
|
||||||
def process_alignment(self, css):
|
def process_alignment(self, css):
|
||||||
'''
|
'''
|
||||||
@ -991,22 +968,22 @@ class HTMLConverter(object):
|
|||||||
if not text.strip():
|
if not text.strip():
|
||||||
text = "Link"
|
text = "Link"
|
||||||
self.add_text(text, tag_css)
|
self.add_text(text, tag_css)
|
||||||
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
self.links[self.target_prefix].append(self.create_link(self.current_para.contents[-1], tag))
|
||||||
if tag.has_key('id') or tag.has_key('name'):
|
if tag.has_key('id') or tag.has_key('name'):
|
||||||
key = 'name' if tag.has_key('name') else 'id'
|
key = 'name' if tag.has_key('name') else 'id'
|
||||||
self.targets[tag[key]] = self.current_block
|
self.targets[self.target_prefix+tag[key]] = self.current_block
|
||||||
elif tag.has_key('name') or tag.has_key('id'):
|
elif tag.has_key('name') or tag.has_key('id'):
|
||||||
key = 'name' if tag.has_key('name') else 'id'
|
key = 'name' if tag.has_key('name') else 'id'
|
||||||
if self.anchor_to_previous:
|
if self.anchor_to_previous:
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
for c in self.anchor_to_previous.contents:
|
for c in self.anchor_to_previous.contents:
|
||||||
if isinstance(c, (TextBlock, ImageBlock)):
|
if isinstance(c, (TextBlock, ImageBlock)):
|
||||||
self.targets[tag[key]] = c
|
self.targets[self.target_prefix+tag[key]] = c
|
||||||
return
|
return
|
||||||
tb = self.book.create_text_block()
|
tb = self.book.create_text_block()
|
||||||
tb.Paragraph(" ")
|
tb.Paragraph(" ")
|
||||||
self.anchor_to_previous.append(tb)
|
self.anchor_to_previous.append(tb)
|
||||||
self.targets[tag[key]] = tb
|
self.targets[self.target_prefix+tag[key]] = tb
|
||||||
return
|
return
|
||||||
previous = self.current_block
|
previous = self.current_block
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
@ -1047,7 +1024,7 @@ class HTMLConverter(object):
|
|||||||
else:
|
else:
|
||||||
target = BlockSpace()
|
target = BlockSpace()
|
||||||
self.current_page.append(target)
|
self.current_page.append(target)
|
||||||
self.targets[tag[key]] = target
|
self.targets[self.target_prefix+tag[key]] = target
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
||||||
path = os.path.abspath(unquote(tag['src']))
|
path = os.path.abspath(unquote(tag['src']))
|
||||||
@ -1202,7 +1179,7 @@ class HTMLConverter(object):
|
|||||||
if tag.has_key('id'):
|
if tag.has_key('id'):
|
||||||
target = self.book.create_text_block(textStyle=self.current_block.textStyle,
|
target = self.book.create_text_block(textStyle=self.current_block.textStyle,
|
||||||
blockStyle=self.current_block.blockStyle)
|
blockStyle=self.current_block.blockStyle)
|
||||||
self.targets[tag['id']] = target
|
self.targets[self.target_prefix+tag['id']] = target
|
||||||
self.end_current_block()
|
self.end_current_block()
|
||||||
self.current_page.append(target)
|
self.current_page.append(target)
|
||||||
src = self.get_text(tag, limit=1000)
|
src = self.get_text(tag, limit=1000)
|
||||||
@ -1371,8 +1348,7 @@ def process_file(path, options, logger=None):
|
|||||||
fpba = ['$', '', '$']
|
fpba = ['$', '', '$']
|
||||||
options.force_page_break_attr = [re.compile(fpba[0], re.IGNORECASE), fpba[1],
|
options.force_page_break_attr = [re.compile(fpba[0], re.IGNORECASE), fpba[1],
|
||||||
re.compile(fpba[2], re.IGNORECASE)]
|
re.compile(fpba[2], re.IGNORECASE)]
|
||||||
conv = HTMLConverter(book, fonts, path, options, logger)
|
conv = HTMLConverter(book, fonts, options, logger, path)
|
||||||
conv.process_links()
|
|
||||||
oname = options.output
|
oname = options.output
|
||||||
if not oname:
|
if not oname:
|
||||||
suffix = '.lrs' if options.lrs else '.lrf'
|
suffix = '.lrs' if options.lrs else '.lrf'
|
||||||
@ -1438,7 +1414,7 @@ def option_parser():
|
|||||||
return lrf_option_parser('''Usage: %prog [options] mybook.html\n\n'''
|
return lrf_option_parser('''Usage: %prog [options] mybook.html\n\n'''
|
||||||
'''%prog converts mybook.html to mybook.lrf''')
|
'''%prog converts mybook.html to mybook.lrf''')
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
try:
|
try:
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
options, args = parser.parse_args(args)
|
options, args = parser.parse_args(args)
|
||||||
@ -1453,7 +1429,8 @@ def main(args=sys.argv):
|
|||||||
warnings.defaultaction = 'error'
|
warnings.defaultaction = 'error'
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
print >> sys.stderr, err
|
print >> sys.stderr, err
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
process_file(src, options)
|
process_file(src, options)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -484,6 +484,8 @@ class Book(Delegator):
|
|||||||
self.applySettings(settings, testValid=True)
|
self.applySettings(settings, testValid=True)
|
||||||
|
|
||||||
self.allow_new_page = True #: If False L{create_page} raises an exception
|
self.allow_new_page = True #: If False L{create_page} raises an exception
|
||||||
|
self.gc_count = 0
|
||||||
|
|
||||||
|
|
||||||
def create_text_style(self, **settings):
|
def create_text_style(self, **settings):
|
||||||
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
||||||
@ -1448,7 +1450,7 @@ class TextBlock(LrsObject, LrsContainer):
|
|||||||
self.blockStyle = blockStyle
|
self.blockStyle = blockStyle
|
||||||
|
|
||||||
# create a textStyle with our current text settings (for Span to find)
|
# create a textStyle with our current text settings (for Span to find)
|
||||||
self.currentTextStyle = textStyle.copy()
|
self.currentTextStyle = textStyle.copy() if self.textSettings else textStyle
|
||||||
self.currentTextStyle.attrs.update(self.textSettings)
|
self.currentTextStyle.attrs.update(self.textSettings)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user