Style improvements and debugging in the docx indexing code

This commit is contained in:
Peter Garst 2014-03-27 11:53:44 -07:00
parent db33444038
commit 353d6dee6c

View File

@ -19,7 +19,7 @@ import lxml.etree
NBSP = '\xa0' NBSP = '\xa0'
class Location: class Location(object):
""" """
This class represents one location in the index. This class represents one location in the index.
We should provide a way to mark the main entries. Libre office We should provide a way to mark the main entries. Libre office
@ -33,7 +33,7 @@ class Location:
self.bookmark = bookmark self.bookmark = bookmark
self.target = target self.target = target
class Entry: class Entry(object):
""" """
This class represents one index entry. This class represents one index entry.
We can also have a list of subentries for the primary/secondary We can also have a list of subentries for the primary/secondary
@ -48,17 +48,17 @@ class Entry:
self.name = name self.name = name
self.index = index self.index = index
def addEntry(self, entry, sub): def add_entry(self, entry, sub):
""" """
The entry has the form [xxx, field, bookmark, target] The entry has the form [xxx, field, bookmark, target]
""" """
if len(sub) == 0: if len(sub) == 0:
self.locations.append(Location(entry[2], entry[3])) self.locations.append(Location(entry[2], entry[3]))
else: else:
sube = Index.findEntry(sub[0], self.subentries, self.index) sube = find_entry(sub[0], self.subentries, self.index)
sube.addEntry(entry, sub[1:]) sube.add_entry(entry, sub[1:])
def makeLink(self, loc, amap): def make_link(self, loc, amap):
# As a first pass, we just put a placeholder in the target location # As a first pass, we just put a placeholder in the target location
# We want it to float right # We want it to float right
markid = amap[loc.bookmark] markid = amap[loc.bookmark]
@ -74,41 +74,41 @@ class Entry:
setattr(text.elem, text.attr, ''.join(text.buf)) setattr(text.elem, text.attr, ''.join(text.buf))
return span return span
def toHtmlUnit(self, body, level, amap): def to_htmlunit(self, body, level, amap):
""" """
Append the material for one index entry to the document. Append the material for one index entry to the document.
There is a name, and 0 or more locations. There is a name, and 0 or more locations.
Put the first location, if any, on the same line as the Put the first location, if any, on the same line as the
name, and others on following lines. name, and others on following lines.
""" """
style = self.index.entryStyles[level] style = self.index.entry_styles[level]
main = Index.addName(self.name, style) main = add_name(self.name, style)
if len(self.locations) == 0: if len(self.locations) == 0:
body.append(main) body.append(main)
return return
# First link on same line as name # First link on same line as name
link = self.makeLink(self.locations[0], amap) link = self.make_link(self.locations[0], amap)
main.append(link) main.append(link)
body.append(main) body.append(main)
# Put other links for same entry on their own lines # Put other links for same entry on their own lines
# To keep the link span separate need to put a space as the name # To keep the link span separate need to put a space as the name
for l in self.locations[1:]: for l in self.locations[1:]:
link = self.makeLink(l, amap) link = self.make_link(l, amap)
dest = P() dest = P()
dest.set('class', style) dest.set('class', style)
dest.text = NBSP dest.text = NBSP
dest.append(link) dest.append(link)
body.append(dest) body.append(dest)
def toHtml(self, body, level, amap): def to_html(self, body, level, amap):
level = min(level, 2) level = min(level, 2)
self.toHtmlUnit(body, level, amap) self.to_htmlunit(body, level, amap)
for key in sorted(self.subentries.keys()): for key in sorted(self.subentries.keys()):
self.subentries[key].toHtml(body, level + 1, amap) self.subentries[key].to_html(body, level + 1, amap)
class Section: class Section(object):
""" """
This class represents one section of the index - usually, This class represents one section of the index - usually,
for example, the A's or the B's. for example, the A's or the B's.
@ -119,7 +119,7 @@ class Section:
self.index = index self.index = index
self.entries = {} self.entries = {}
def addEntry(self, entry): def add_entry(self, entry):
""" """
We have information from one index marker. We have information from one index marker.
The entry has form [name, field, bookmark, target]. The entry has form [name, field, bookmark, target].
@ -128,19 +128,19 @@ class Section:
location to it; otherwise create a new entry. location to it; otherwise create a new entry.
""" """
topics = entry[0].strip('"').split(':') topics = entry[0].strip('"').split(':')
targ = Index.findEntry(topics[0], self.entries, self.index) targ = find_entry(topics[0], self.entries, self.index)
targ.addEntry(entry, topics[1:]) targ.add_entry(entry, topics[1:])
def toHtml(self, key, body, amap): def to_html(self, key, body, amap):
""" """
Add one section of the index to the html Add one section of the index to the html
""" """
if len(key) > 0: if len(key) > 0:
body.append(Index.addName(key, self.index.sectionStyle)) body.append(add_name(key, self.index.sectionStyle))
for ekey in sorted(self.entries.keys()): for ekey in sorted(self.entries.keys()):
self.entries[ekey].toHtml(body, 0, amap) self.entries[ekey].to_html(body, 0, amap)
class Index: class Index(object):
""" """
This class generates an alphabetical index from the index markers in a docx file. This class generates an alphabetical index from the index markers in a docx file.
@ -166,24 +166,24 @@ class Index:
self.convert = convert self.convert = convert
self.sections = {} self.sections = {}
self.genStyles() self.gen_styles()
# Get a list of [name, field] entries, where name is the index # Get a list of [name, field] entries, where name is the index
# entry and field is the indexed location # entry and field is the indexed location
self.entries = self.getEntries() self.entries = self.get_entries()
# Find styles which are provide the text for links. # Find styles which are provide the text for links.
self.targetStyles() self.target_styles()
# Generate bookmarks in the document at the indexed locations # Generate bookmarks in the document at the indexed locations
self.bookmarks() self.bookmarks()
# Set up the entries in index sections # Set up the entries in index sections
for unit in self.entries: for unit in self.entries:
sec = self.findSection(unit[0]) sec = self.find_section(unit[0])
sec.addEntry(unit) sec.add_entry(unit)
def getEntries(self): def get_entries(self):
""" """
We already have a list of fields which includes the index marks, We already have a list of fields which includes the index marks,
identified by an XE tag. identified by an XE tag.
@ -204,9 +204,9 @@ class Index:
# Only want the index entries # Only want the index entries
fields = filter(lambda f: len(f.instructions) > 0 and f.instructions[0][0] == 'XE', fields) fields = filter(lambda f: len(f.instructions) > 0 and f.instructions[0][0] == 'XE', fields)
return map(lambda f: [self.getEntry(f), f], fields) return map(lambda f: [self.get_entry(f), f], fields)
def getEntry(self, field): def get_entry(self, field):
elist = [field.instructions[0][1]] elist = [field.instructions[0][1]]
for inst in field.instructions[1:]: for inst in field.instructions[1:]:
@ -220,7 +220,7 @@ class Index:
sep2 = sep1[2].partition('"') sep2 = sep1[2].partition('"')
return sep2[0] return sep2[0]
def targetStyles(self): def target_styles(self):
""" """
We want to get a list of styles which represent valid index targets. We want to get a list of styles which represent valid index targets.
That is, the text of a link in the index will be the title of the That is, the text of a link in the index will be the title of the
@ -234,9 +234,9 @@ class Index:
jumped in earlier and could map it to the original docx styles. jumped in earlier and could map it to the original docx styles.
""" """
smap = self.convert.styles.id_map smap = self.convert.styles.id_map
self.targstyles = [name for name, style in smap.iteritems() if style.name.startswith('Heading')] self.targstyles = [name for name, style in smap.iteritems() if style.name.lower().startswith('heading')]
def isHeading(self, node): def is_heading(self, node):
""" """
Return true if the input node is a valid index link target. Return true if the input node is a valid index link target.
""" """
@ -253,14 +253,14 @@ class Index:
style = sn.get(k[0]) style = sn.get(k[0])
return style in self.targstyles return style in self.targstyles
def getHeadings(self, node): def get_headings(self, node):
""" """
Get a list of all children of the input node which are headings - Get a list of all children of the input node which are headings -
that is, valid targets for an index link that is, valid targets for an index link
""" """
answer = [] answer = []
for c in node.getchildren(): for c in node.getchildren():
if self.isHeading(c): if self.is_heading(c):
answer.append(c) answer.append(c)
return answer return answer
@ -290,7 +290,7 @@ class Index:
original names. original names.
""" """
pnode = ancestor(node, 'w:p') pnode = ancestor(node, 'w:p')
if self.isHeading(pnode): if self.is_heading(pnode):
return self.textValue(pnode) return self.textValue(pnode)
while True: while True:
@ -300,7 +300,7 @@ class Index:
# Maintain document order in these lists # Maintain document order in these lists
pindex = parent.index(pnode) pindex = parent.index(pnode)
hlist = self.getHeadings(parent) hlist = self.get_headings(parent)
hlist = filter(lambda x: parent.index(x) < pindex, hlist) hlist = filter(lambda x: parent.index(x) < pindex, hlist)
if len(hlist) > 0: if len(hlist) > 0:
return self.textValue(hlist[-1]) return self.textValue(hlist[-1])
@ -344,7 +344,7 @@ class Index:
targnode = self.findTarget(rnode) targnode = self.findTarget(rnode)
entry.append(targnode) entry.append(targnode)
def genStyles(self): def gen_styles(self):
""" """
Generate css styles for the index elements. Generate css styles for the index elements.
We do title, section header, and three levels of entries. We do title, section header, and three levels of entries.
@ -360,13 +360,13 @@ class Index:
css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')]) css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')])
self.sectionStyle = self.convert.styles.register(css, 'block') self.sectionStyle = self.convert.styles.register(css, 'block')
self.entryStyles = [] self.entry_styles = []
for i in range(3): for i in range(3):
indent = str(i*20) + 'pt' indent = str(i*20) + 'pt'
css = OrderedDict([('margin-top', '0pt'), ('margin-bottom', '0pt'), ('margin-left', indent)]) css = OrderedDict([('margin-top', '0pt'), ('margin-bottom', '0pt'), ('margin-left', indent)])
self.entryStyles.append(self.convert.styles.register(css, 'block')) self.entry_styles.append(self.convert.styles.register(css, 'block'))
def findSection(self, tag): def find_section(self, tag):
""" """
Find the section for this index entry, creating it if required. Find the section for this index entry, creating it if required.
The tag has a form like A or A:B or etc. The tag has a form like A or A:B or etc.
@ -393,35 +393,33 @@ class Index:
This method writes it into the html. This method writes it into the html.
""" """
body = self.convert.body body = self.convert.body
body.append(Index.addName('Index', self.titleStyle)) body.append(add_name('Index', self.titleStyle))
# And write them to the html # And write them to the html
for key in sorted(self.sections.keys()): for key in sorted(self.sections.keys()):
self.sections[key].toHtml(key, body, self.convert.anchor_map) self.sections[key].to_html(key, body, self.convert.anchor_map)
@staticmethod def add_name(str, clname):
def addName(str, clname): # Put this into the convert document map?
# Put this into the convert document map? dest = P()
dest = P() dest.set('class', clname)
dest.set('class', clname) span = SPAN()
span = SPAN() from calibre.ebooks.docx.to_html import Text
from calibre.ebooks.docx.to_html import Text text = Text(span, 'text', [])
text = Text(span, 'text', []) text.buf.append(str)
text.buf.append(str) setattr(text.elem, text.attr, ''.join(text.buf))
setattr(text.elem, text.attr, ''.join(text.buf)) dest.append(span)
dest.append(span) return dest
return dest
def find_entry(value, dict, index):
@staticmethod """
def findEntry(value, dict, index): Find the Entry in the dictionary, or create a new one.
""" We convert to lower case to group all capitalizations
Find the Entry in the dictionary, or create a new one. together as a single entry.
We convert to lower case to group all capitalizations """
together as a single entry. lvalue = value.lower()
""" if lvalue in dict:
lvalue = value.lower() return dict[lvalue]
if lvalue in dict: ent = Entry(value, index)
return dict[lvalue] dict[lvalue] = ent
ent = Entry(value, index) return ent
dict[lvalue] = ent
return ent