mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Stylistic cleanups
This commit is contained in:
parent
c9d593ae2b
commit
2f3727956b
@ -22,8 +22,7 @@ from calibre.utils.zipfile import ZipFile
|
|||||||
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||||
|
|
||||||
def fromstring(raw, parser=RECOVER_PARSER):
|
def fromstring(raw, parser=RECOVER_PARSER):
|
||||||
res = etree.fromstring(raw, parser=parser)
|
return etree.fromstring(raw, parser=parser)
|
||||||
return res
|
|
||||||
|
|
||||||
# Read metadata {{{
|
# Read metadata {{{
|
||||||
def read_doc_props(raw, mi):
|
def read_doc_props(raw, mi):
|
||||||
|
@ -10,8 +10,6 @@ import re
|
|||||||
|
|
||||||
from calibre.ebooks.docx.names import XPath, get
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
class Field(object):
|
class Field(object):
|
||||||
|
|
||||||
def __init__(self, start):
|
def __init__(self, start):
|
||||||
|
@ -6,21 +6,18 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import itertools
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from lxml import html
|
|
||||||
from lxml.html.builder import (
|
|
||||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)
|
|
||||||
from calibre.ebooks.docx.names import (
|
|
||||||
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
|
|
||||||
ancestor, descendants, namespaces, FOOTNOTES, ENDNOTES, children, THEMES, SETTINGS)
|
|
||||||
|
|
||||||
|
from lxml.html.builder import A, SPAN
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.names import XPath, ancestor, namespaces
|
||||||
|
|
||||||
|
|
||||||
NBSP = '\xa0'
|
NBSP = '\xa0'
|
||||||
|
|
||||||
class Location(object):
|
class Location(object):
|
||||||
"""
|
r"""
|
||||||
This class represents one location in the index.
|
This class represents one location in the index.
|
||||||
We should provide a way to mark the main entries. Libre office
|
We should provide a way to mark the main entries. Libre office
|
||||||
has a main attribute, which doesn't seem to map to docx, and at least
|
has a main attribute, which doesn't seem to map to docx, and at least
|
||||||
@ -36,10 +33,10 @@ class Location(object):
|
|||||||
class Entry(object):
|
class Entry(object):
|
||||||
"""
|
"""
|
||||||
This class represents one index entry.
|
This class represents one index entry.
|
||||||
We can also have a list of subentries for the primary/secondary
|
We can also have a list of sub-entries for the primary/secondary
|
||||||
topic situation.
|
topic situation.
|
||||||
Each entry has a list of locations we want to point to, but
|
Each entry has a list of locations we want to point to, but
|
||||||
it could be empty if this is only here to organize subentries.
|
it could be empty if this is only here to organize sub-entries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name, index):
|
def __init__(self, name, index):
|
||||||
@ -47,7 +44,7 @@ class Entry(object):
|
|||||||
self.locations = []
|
self.locations = []
|
||||||
self.name = name
|
self.name = name
|
||||||
self.index = index
|
self.index = index
|
||||||
|
|
||||||
def add_entry(self, entry, sub):
|
def add_entry(self, entry, sub):
|
||||||
"""
|
"""
|
||||||
The entry has the form [xxx, field, bookmark, target]
|
The entry has the form [xxx, field, bookmark, target]
|
||||||
@ -62,7 +59,7 @@ class Entry(object):
|
|||||||
# As a first pass, we just put a placeholder in the target location
|
# As a first pass, we just put a placeholder in the target location
|
||||||
# We want it to float right
|
# We want it to float right
|
||||||
markid = amap[loc.bookmark]
|
markid = amap[loc.bookmark]
|
||||||
if markid == None:
|
if markid is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
span = A()
|
span = A()
|
||||||
@ -130,13 +127,13 @@ class Section(object):
|
|||||||
topics = entry[0].strip('"').split(':')
|
topics = entry[0].strip('"').split(':')
|
||||||
targ = find_entry(topics[0], self.entries, self.index)
|
targ = find_entry(topics[0], self.entries, self.index)
|
||||||
targ.add_entry(entry, topics[1:])
|
targ.add_entry(entry, topics[1:])
|
||||||
|
|
||||||
def to_html(self, key, body, amap):
|
def to_html(self, key, body, amap):
|
||||||
"""
|
"""
|
||||||
Add one section of the index to the html
|
Add one section of the index to the html
|
||||||
"""
|
"""
|
||||||
if len(key) > 0:
|
if len(key) > 0:
|
||||||
body.append(add_name(key, self.index.sectionStyle))
|
body.append(add_name(key, self.index.section_style))
|
||||||
for ekey in sorted(self.entries.keys()):
|
for ekey in sorted(self.entries.keys()):
|
||||||
self.entries[ekey].to_html(body, 0, amap)
|
self.entries[ekey].to_html(body, 0, amap)
|
||||||
|
|
||||||
@ -145,7 +142,7 @@ class Index(object):
|
|||||||
This class generates an alphabetical index from the index markers in a docx file.
|
This class generates an alphabetical index from the index markers in a docx file.
|
||||||
|
|
||||||
Each field in the parse of the docx file contains an instructions list.
|
Each field in the parse of the docx file contains an instructions list.
|
||||||
Instructions with name XE are index instructions.
|
Instructions with name XE are index instructions.
|
||||||
The instruction also contains the entry specifier, of the form A[:B[:C]] for
|
The instruction also contains the entry specifier, of the form A[:B[:C]] for
|
||||||
main entry, A, subentry B, and so on.
|
main entry, A, subentry B, and so on.
|
||||||
|
|
||||||
@ -184,10 +181,10 @@ class Index(object):
|
|||||||
sec.add_entry(unit)
|
sec.add_entry(unit)
|
||||||
|
|
||||||
def get_entries(self):
|
def get_entries(self):
|
||||||
"""
|
r"""
|
||||||
We already have a list of fields which includes the index marks,
|
We already have a list of fields which includes the index marks,
|
||||||
identified by an XE tag.
|
identified by an XE tag.
|
||||||
In the base case, the field object includes an instruction list
|
In the base case, the field object includes an instruction list
|
||||||
with one tuple like ('XE', '"entry"'), where entry is the text we
|
with one tuple like ('XE', '"entry"'), where entry is the text we
|
||||||
want to put in the index. Note the double quotes around the entry.
|
want to put in the index. Note the double quotes around the entry.
|
||||||
Sometimes the entry is broken up in the document, for example if
|
Sometimes the entry is broken up in the document, for example if
|
||||||
@ -202,28 +199,27 @@ class Index(object):
|
|||||||
"""
|
"""
|
||||||
fields = self.convert.fields.fields
|
fields = self.convert.fields.fields
|
||||||
|
|
||||||
|
def get_entry(field):
|
||||||
|
elist = [field.instructions[0][1]]
|
||||||
|
for inst in field.instructions[1:]:
|
||||||
|
elist.append(inst[0])
|
||||||
|
elist.append(inst[1])
|
||||||
|
|
||||||
|
entry = ''.join(elist)
|
||||||
|
sep1 = entry.partition('"')
|
||||||
|
if sep1[2] == '':
|
||||||
|
return entry
|
||||||
|
sep2 = sep1[2].partition('"')
|
||||||
|
return sep2[0]
|
||||||
|
|
||||||
# Only want the index entries
|
# Only want the index entries
|
||||||
fields = filter(lambda f: len(f.instructions) > 0 and f.instructions[0][0] == 'XE', fields)
|
return [[get_entry(f), f] for f in fields
|
||||||
return map(lambda f: [self.get_entry(f), f], fields)
|
if f.instructions and f.instructions[0][0] == 'XE']
|
||||||
|
|
||||||
def get_entry(self, field):
|
|
||||||
|
|
||||||
elist = [field.instructions[0][1]]
|
|
||||||
for inst in field.instructions[1:]:
|
|
||||||
elist.append(inst[0])
|
|
||||||
elist.append(inst[1])
|
|
||||||
|
|
||||||
entry = ''.join(elist)
|
|
||||||
sep1 = entry.partition('"')
|
|
||||||
if sep1[2] == '':
|
|
||||||
return entry
|
|
||||||
sep2 = sep1[2].partition('"')
|
|
||||||
return sep2[0]
|
|
||||||
|
|
||||||
def target_styles(self):
|
def target_styles(self):
|
||||||
"""
|
"""
|
||||||
We want to get a list of styles which represent valid index targets.
|
We want to get a list of styles which represent valid index targets.
|
||||||
That is, the text of a link in the index will be the title of the
|
That is, the text of a link in the index will be the title of the
|
||||||
section of the document containing the indexed location.
|
section of the document containing the indexed location.
|
||||||
We want the list of styles which can provide a valid title.
|
We want the list of styles which can provide a valid title.
|
||||||
In practice, this maps to Heading1 through Heading3 in the original document.
|
In practice, this maps to Heading1 through Heading3 in the original document.
|
||||||
@ -242,7 +238,7 @@ class Index(object):
|
|||||||
"""
|
"""
|
||||||
snodes = XPath("./w:pPr/w:pStyle")(node)
|
snodes = XPath("./w:pPr/w:pStyle")(node)
|
||||||
if len(snodes) == 0:
|
if len(snodes) == 0:
|
||||||
return False;
|
return False
|
||||||
|
|
||||||
sn = snodes[0]
|
sn = snodes[0]
|
||||||
|
|
||||||
@ -264,14 +260,13 @@ class Index(object):
|
|||||||
answer.append(c)
|
answer.append(c)
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
def textValue(self, node):
|
def text_value(self, node):
|
||||||
tnodes = XPath("./w:r/w:t")(node)
|
tnodes = XPath("./w:r/w:t")(node)
|
||||||
if len(tnodes) == 0:
|
if len(tnodes) == 0:
|
||||||
return 'Link'
|
return 'Link'
|
||||||
textl = map(lambda x: x.text, tnodes)
|
return ''.join((x.text or '') for x in tnodes)
|
||||||
return ''.join(textl)
|
|
||||||
|
|
||||||
def findTarget(self, node):
|
def find_target(self, node):
|
||||||
"""
|
"""
|
||||||
Given an index entry, find the text of the last heading section
|
Given an index entry, find the text of the last heading section
|
||||||
preceding the entry.
|
preceding the entry.
|
||||||
@ -279,7 +274,7 @@ class Index(object):
|
|||||||
return the text.
|
return the text.
|
||||||
Otherwise, go up the document level by level, staring with the
|
Otherwise, go up the document level by level, staring with the
|
||||||
parent of the w:p element containing the entry.
|
parent of the w:p element containing the entry.
|
||||||
At each level, get the list of heading w:p elements which are
|
At each level, get the list of heading w:p elements which are
|
||||||
children of the top node. We also have the index in the top node
|
children of the top node. We also have the index in the top node
|
||||||
of the child node containing the entry.
|
of the child node containing the entry.
|
||||||
Find the largest index of a heading child which is < the entry
|
Find the largest index of a heading child which is < the entry
|
||||||
@ -291,11 +286,11 @@ class Index(object):
|
|||||||
"""
|
"""
|
||||||
pnode = ancestor(node, 'w:p')
|
pnode = ancestor(node, 'w:p')
|
||||||
if self.is_heading(pnode):
|
if self.is_heading(pnode):
|
||||||
return self.textValue(pnode)
|
return self.text_value(pnode)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
parent = pnode.getparent()
|
parent = pnode.getparent()
|
||||||
if parent == None:
|
if parent is None:
|
||||||
return 'Link'
|
return 'Link'
|
||||||
|
|
||||||
# Maintain document order in these lists
|
# Maintain document order in these lists
|
||||||
@ -303,7 +298,7 @@ class Index(object):
|
|||||||
hlist = self.get_headings(parent)
|
hlist = self.get_headings(parent)
|
||||||
hlist = filter(lambda x: parent.index(x) < pindex, hlist)
|
hlist = filter(lambda x: parent.index(x) < pindex, hlist)
|
||||||
if len(hlist) > 0:
|
if len(hlist) > 0:
|
||||||
return self.textValue(hlist[-1])
|
return self.text_value(hlist[-1])
|
||||||
|
|
||||||
# Try again
|
# Try again
|
||||||
pnode = parent
|
pnode = parent
|
||||||
@ -341,7 +336,7 @@ class Index(object):
|
|||||||
# We should make the targets configurable, and add chapter
|
# We should make the targets configurable, and add chapter
|
||||||
# titles and maybe other things.
|
# titles and maybe other things.
|
||||||
# What about numbering?
|
# What about numbering?
|
||||||
targnode = self.findTarget(rnode)
|
targnode = self.find_target(rnode)
|
||||||
entry.append(targnode)
|
entry.append(targnode)
|
||||||
|
|
||||||
def gen_styles(self):
|
def gen_styles(self):
|
||||||
@ -350,15 +345,15 @@ class Index(object):
|
|||||||
We do title, section header, and three levels of entries.
|
We do title, section header, and three levels of entries.
|
||||||
These are reasonable styles which only set a couple of key
|
These are reasonable styles which only set a couple of key
|
||||||
values, but we could provide an interface to allow the user to set them.
|
values, but we could provide an interface to allow the user to set them.
|
||||||
Is there any problem registering the styles this early in the
|
Is there any problem registering the styles this early in the
|
||||||
conversion process?
|
conversion process?
|
||||||
"""
|
"""
|
||||||
# The result is a string we can use as a class name.
|
# The result is a string we can use as a class name.
|
||||||
css = OrderedDict([('font-size', '20pt'), ('page-break-before', 'always')])
|
css = OrderedDict([('font-size', '20pt'), ('page-break-before', 'always')])
|
||||||
self.titleStyle = self.convert.styles.register(css, 'block')
|
self.title_style = self.convert.styles.register(css, 'block')
|
||||||
|
|
||||||
css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')])
|
css = OrderedDict([('font-size', '16pt'), ('margin-top', '20pt'), ('margin-bottom', '10pt')])
|
||||||
self.sectionStyle = self.convert.styles.register(css, 'block')
|
self.section_style = self.convert.styles.register(css, 'block')
|
||||||
|
|
||||||
self.entry_styles = []
|
self.entry_styles = []
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
@ -393,7 +388,7 @@ class Index(object):
|
|||||||
This method writes it into the html.
|
This method writes it into the html.
|
||||||
"""
|
"""
|
||||||
body = self.convert.body
|
body = self.convert.body
|
||||||
body.append(add_name('Index', self.titleStyle))
|
body.append(add_name('Index', self.title_style))
|
||||||
|
|
||||||
# And write them to the html
|
# And write them to the html
|
||||||
for key in sorted(self.sections.keys()):
|
for key in sorted(self.sections.keys()):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user