Stylistic cleanups

This commit is contained in:
Kovid Goyal 2014-03-30 19:10:40 +05:30
parent c9d593ae2b
commit 2f3727956b
3 changed files with 44 additions and 52 deletions

View File

@ -22,8 +22,7 @@ from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
def fromstring(raw, parser=RECOVER_PARSER): def fromstring(raw, parser=RECOVER_PARSER):
res = etree.fromstring(raw, parser=parser) return etree.fromstring(raw, parser=parser)
return res
# Read metadata {{{ # Read metadata {{{
def read_doc_props(raw, mi): def read_doc_props(raw, mi):

View File

@ -10,8 +10,6 @@ import re
from calibre.ebooks.docx.names import XPath, get from calibre.ebooks.docx.names import XPath, get
import sys
class Field(object): class Field(object):
def __init__(self, start): def __init__(self, start):

View File

@ -6,21 +6,18 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import itertools
from collections import OrderedDict from collections import OrderedDict
from lxml import html
from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)
from calibre.ebooks.docx.names import (
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
ancestor, descendants, namespaces, FOOTNOTES, ENDNOTES, children, THEMES, SETTINGS)
from lxml.html.builder import A, SPAN
import lxml.etree import lxml.etree
from calibre.ebooks.docx.names import XPath, ancestor, namespaces
NBSP = '\xa0' NBSP = '\xa0'
class Location(object): class Location(object):
""" r"""
This class represents one location in the index. This class represents one location in the index.
We should provide a way to mark the main entries. Libre office We should provide a way to mark the main entries. Libre office
has a main attribute, which doesn't seem to map to docx, and at least has a main attribute, which doesn't seem to map to docx, and at least
@ -36,10 +33,10 @@ class Location(object):
class Entry(object): class Entry(object):
""" """
This class represents one index entry. This class represents one index entry.
We can also have a list of subentries for the primary/secondary We can also have a list of sub-entries for the primary/secondary
topic situation. topic situation.
Each entry has a list of locations we want to point to, but Each entry has a list of locations we want to point to, but
it could be empty if this is only here to organize subentries. it could be empty if this is only here to organize sub-entries.
""" """
def __init__(self, name, index): def __init__(self, name, index):
@ -47,7 +44,7 @@ class Entry(object):
self.locations = [] self.locations = []
self.name = name self.name = name
self.index = index self.index = index
def add_entry(self, entry, sub): def add_entry(self, entry, sub):
""" """
The entry has the form [xxx, field, bookmark, target] The entry has the form [xxx, field, bookmark, target]
@ -62,7 +59,7 @@ class Entry(object):
# As a first pass, we just put a placeholder in the target location # As a first pass, we just put a placeholder in the target location
# We want it to float right # We want it to float right
markid = amap[loc.bookmark] markid = amap[loc.bookmark]
if markid == None: if markid is None:
return return
span = A() span = A()
@ -130,13 +127,13 @@ class Section(object):
topics = entry[0].strip('"').split(':') topics = entry[0].strip('"').split(':')
targ = find_entry(topics[0], self.entries, self.index) targ = find_entry(topics[0], self.entries, self.index)
targ.add_entry(entry, topics[1:]) targ.add_entry(entry, topics[1:])
def to_html(self, key, body, amap): def to_html(self, key, body, amap):
""" """
Add one section of the index to the html Add one section of the index to the html
""" """
if len(key) > 0: if len(key) > 0:
body.append(add_name(key, self.index.sectionStyle)) body.append(add_name(key, self.index.section_style))
for ekey in sorted(self.entries.keys()): for ekey in sorted(self.entries.keys()):
self.entries[ekey].to_html(body, 0, amap) self.entries[ekey].to_html(body, 0, amap)
@ -145,7 +142,7 @@ class Index(object):
This class generates an alphabetical index from the index markers in a docx file. This class generates an alphabetical index from the index markers in a docx file.
Each field in the parse of the docx file contains an instructions list. Each field in the parse of the docx file contains an instructions list.
Instructions with name XE are index instructions. Instructions with name XE are index instructions.
The instruction also contains the entry specifier, of the form A[:B[:C]] for The instruction also contains the entry specifier, of the form A[:B[:C]] for
main entry, A, subentry B, and so on. main entry, A, subentry B, and so on.
@ -184,10 +181,10 @@ class Index(object):
sec.add_entry(unit) sec.add_entry(unit)
def get_entries(self): def get_entries(self):
""" r"""
We already have a list of fields which includes the index marks, We already have a list of fields which includes the index marks,
identified by an XE tag. identified by an XE tag.
In the base case, the field object includes an instruction list In the base case, the field object includes an instruction list
with one tuple like ('XE', '"entry"'), where entry is the text we with one tuple like ('XE', '"entry"'), where entry is the text we
want to put in the index. Note the double quotes around the entry. want to put in the index. Note the double quotes around the entry.
Sometimes the entry is broken up in the document, for example if Sometimes the entry is broken up in the document, for example if
@ -202,28 +199,27 @@ class Index(object):
""" """
fields = self.convert.fields.fields fields = self.convert.fields.fields
def get_entry(field):
elist = [field.instructions[0][1]]
for inst in field.instructions[1:]:
elist.append(inst[0])
elist.append(inst[1])
entry = ''.join(elist)
sep1 = entry.partition('"')
if sep1[2] == '':
return entry
sep2 = sep1[2].partition('"')
return sep2[0]
# Only want the index entries # Only want the index entries
fields = filter(lambda f: len(f.instructions) > 0 and f.instructions[0][0] == 'XE', fields) return [[get_entry(f), f] for f in fields
return map(lambda f: [self.get_entry(f), f], fields) if f.instructions and f.instructions[0][0] == 'XE']
def get_entry(self, field):
elist = [field.instructions[0][1]]
for inst in field.instructions[1:]:
elist.append(inst[0])
elist.append(inst[1])
entry = ''.join(elist)
sep1 = entry.partition('"')
if sep1[2] == '':
return entry
sep2 = sep1[2].partition('"')
return sep2[0]
def target_styles(self): def target_styles(self):
""" """
We want to get a list of styles which represent valid index targets. We want to get a list of styles which represent valid index targets.
That is, the text of a link in the index will be the title of the That is, the text of a link in the index will be the title of the
section of the document containing the indexed location. section of the document containing the indexed location.
We want the list of styles which can provide a valid title. We want the list of styles which can provide a valid title.
In practice, this maps to Heading1 through Heading3 in the original document. In practice, this maps to Heading1 through Heading3 in the original document.
@ -242,7 +238,7 @@ class Index(object):
""" """
snodes = XPath("./w:pPr/w:pStyle")(node) snodes = XPath("./w:pPr/w:pStyle")(node)
if len(snodes) == 0: if len(snodes) == 0:
return False; return False
sn = snodes[0] sn = snodes[0]
@ -264,14 +260,13 @@ class Index(object):
answer.append(c) answer.append(c)
return answer return answer
def textValue(self, node): def text_value(self, node):
tnodes = XPath("./w:r/w:t")(node) tnodes = XPath("./w:r/w:t")(node)
if len(tnodes) == 0: if len(tnodes) == 0:
return 'Link' return 'Link'
textl = map(lambda x: x.text, tnodes) return ''.join((x.text or '') for x in tnodes)
return ''.join(textl)
def findTarget(self, node): def find_target(self, node):
""" """
Given an index entry, find the text of the last heading section Given an index entry, find the text of the last heading section
preceding the entry. preceding the entry.
@ -279,7 +274,7 @@ class Index(object):
return the text. return the text.
Otherwise, go up the document level by level, staring with the Otherwise, go up the document level by level, staring with the
parent of the w:p element containing the entry. parent of the w:p element containing the entry.
At each level, get the list of heading w:p elements which are At each level, get the list of heading w:p elements which are
children of the top node. We also have the index in the top node children of the top node. We also have the index in the top node
of the child node containing the entry. of the child node containing the entry.
Find the largest index of a heading child which is < the entry Find the largest index of a heading child which is < the entry
@ -291,11 +286,11 @@ class Index(object):
""" """
pnode = ancestor(node, 'w:p') pnode = ancestor(node, 'w:p')
if self.is_heading(pnode): if self.is_heading(pnode):
return self.textValue(pnode) return self.text_value(pnode)
while True: while True:
parent = pnode.getparent() parent = pnode.getparent()
if parent == None: if parent is None:
return 'Link' return 'Link'
# Maintain document order in these lists # Maintain document order in these lists
@ -303,7 +298,7 @@ class Index(object):
hlist = self.get_headings(parent) hlist = self.get_headings(parent)
hlist = filter(lambda x: parent.index(x) < pindex, hlist) hlist = filter(lambda x: parent.index(x) < pindex, hlist)
if len(hlist) > 0: if len(hlist) > 0:
return self.textValue(hlist[-1]) return self.text_value(hlist[-1])
# Try again # Try again
pnode = parent pnode = parent
@ -341,7 +336,7 @@ class Index(object):
# We should make the targets configurable, and add chapter # We should make the targets configurable, and add chapter
# titles and maybe other things. # titles and maybe other things.
# What about numbering? # What about numbering?
targnode = self.findTarget(rnode) targnode = self.find_target(rnode)
entry.append(targnode) entry.append(targnode)
def gen_styles(self): def gen_styles(self):
@ -350,15 +345,15 @@ class Index(object):
We do title, section header, and three levels of entries. We do title, section header, and three levels of entries.
These are reasonable styles which only set a couple of key These are reasonable styles which only set a couple of key
values, but we could provide an interface to allow the user to set them. values, but we could provide an interface to allow the user to set them.
Is there any problem registering the styles this early in the Is there any problem registering the styles this early in the
conversion process? conversion process?
""" """
# The result is a string we can use as a class name. # The result is a string we can use as a class name.
css = OrderedDict([('font-size', '20pt'), ('page-break-before', 'always')]) css = OrderedDict([('font-size', '20pt'), ('page-break-before', 'always')])
self.titleStyle = self.convert.styles.register(css, 'block') self.title_style = self.convert.styles.register(css, 'block')
css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')]) css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')])
self.sectionStyle = self.convert.styles.register(css, 'block') self.section_style = self.convert.styles.register(css, 'block')
self.entry_styles = [] self.entry_styles = []
for i in range(3): for i in range(3):
@ -393,7 +388,7 @@ class Index(object):
This method writes it into the html. This method writes it into the html.
""" """
body = self.convert.body body = self.convert.body
body.append(add_name('Index', self.titleStyle)) body.append(add_name('Index', self.title_style))
# And write them to the html # And write them to the html
for key in sorted(self.sections.keys()): for key in sorted(self.sections.keys()):