diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 950669814a..c0e117fc0a 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -138,9 +138,9 @@ class HTMLConverter(object): (re.compile('<]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?', re.IGNORECASE), lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), (re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), - lambda match : '

%s

'%(match.group(1),)), + lambda match : '

%s

'%(match.group(1),)), (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), - lambda match : '

%s

'%(match.group(1),)), + lambda match : '

%s

'%(match.group(1),)), # Blank lines (re.compile('<]*?>( ){4}', re.IGNORECASE), lambda match : '

'), @@ -206,6 +206,8 @@ class HTMLConverter(object): self.targets = {} #: and id elements self.links = deque() #: elements self.processed_files = [] + self.extra_toc_entries = [] #: TOC entries gleaned from semantic information + self.id_counter = 0 self.unused_target_blocks = [] #: Used to remove extra TextBlocks self.link_level = 0 #: Current link level self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported @@ -265,6 +267,10 @@ class HTMLConverter(object): self.links = self.process_links() self.link_level += 1 paths = [link['path'] for link in self.links] + + for text, tb in self.extra_toc_entries: + ascii_text = text.encode('ascii', 'ignore') + self.book.addTocEntry(ascii_text, tb) def is_baen(self, soup): return bool(soup.find('meta', attrs={'name':'Publisher', @@ -1441,12 +1447,19 @@ class HTMLConverter(object): elif tagname in ['p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']: new_block = self.process_block(tag, tag_css) - if self.anchor_ids and tag.has_key('id'): + if (self.anchor_ids and tag.has_key('id')) or \ + (self.book_designer and tag.has_key('class') and tag['class']=='title'): + if not tag.has_key('id'): + tag['id'] = 'libprs500_id_'+str(self.id_counter) + self.id_counter += 1 + tkey = self.target_prefix+tag['id'] if not new_block: self.end_current_block() self.current_block.must_append = True self.targets[tkey] = self.current_block + if (self.book_designer and tag.has_key('class') and tag['class']=='title'): + self.extra_toc_entries.append((self.get_text(tag, 100), self.current_block)) src = self.get_text(tag, limit=1000)