diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py
index 950669814a..c0e117fc0a 100644
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@@ -138,9 +138,9 @@ class HTMLConverter(object):
(re.compile('
<]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?
', re.IGNORECASE),
lambda match : '%s
'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
- lambda match : '%s
'%(match.group(1),)),
+ lambda match : '%s
'%(match.group(1),)),
(re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
- lambda match : '%s
'%(match.group(1),)),
+ lambda match : '%s
'%(match.group(1),)),
# Blank lines
(re.compile('<]*?>( ){4}
', re.IGNORECASE),
lambda match : ''),
@@ -206,6 +206,8 @@ class HTMLConverter(object):
self.targets = {} #: and id elements
self.links = deque() #: elements
self.processed_files = []
+ self.extra_toc_entries = [] #: TOC entries gleaned from semantic information
+ self.id_counter = 0
self.unused_target_blocks = [] #: Used to remove extra TextBlocks
self.link_level = 0 #: Current link level
self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported
@@ -265,6 +267,10 @@ class HTMLConverter(object):
self.links = self.process_links()
self.link_level += 1
paths = [link['path'] for link in self.links]
+
+ for text, tb in self.extra_toc_entries:
+ ascii_text = text.encode('ascii', 'ignore')
+ self.book.addTocEntry(ascii_text, tb)
def is_baen(self, soup):
return bool(soup.find('meta', attrs={'name':'Publisher',
@@ -1441,12 +1447,19 @@ class HTMLConverter(object):
elif tagname in ['p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
new_block = self.process_block(tag, tag_css)
- if self.anchor_ids and tag.has_key('id'):
+ if (self.anchor_ids and tag.has_key('id')) or \
+ (self.book_designer and tag.has_key('class') and tag['class']=='title'):
+ if not tag.has_key('id'):
+ tag['id'] = 'libprs500_id_'+str(self.id_counter)
+ self.id_counter += 1
+
tkey = self.target_prefix+tag['id']
if not new_block:
self.end_current_block()
self.current_block.must_append = True
self.targets[tkey] = self.current_block
+ if (self.book_designer and tag.has_key('class') and tag['class']=='title'):
+ self.extra_toc_entries.append((self.get_text(tag, 100), self.current_block))
src = self.get_text(tag, limit=1000)