mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PML Input refactoring.
This commit is contained in:
parent
3289574153
commit
27935c4a71
@ -50,7 +50,7 @@ class PML_HTMLizer(object):
|
|||||||
]
|
]
|
||||||
|
|
||||||
STATES_TAGS = {
|
STATES_TAGS = {
|
||||||
'h1': ('<h1 style="page-break-after: always;">', '</h1>'),
|
'h1': ('<h1 style="page-break-before: always;">', '</h1>'),
|
||||||
'h2': ('<h2>', '</h2>'),
|
'h2': ('<h2>', '</h2>'),
|
||||||
'h3': ('<h3>', '</h3>'),
|
'h3': ('<h3>', '</h3>'),
|
||||||
'h4': ('<h4>', '</h4>'),
|
'h4': ('<h4>', '</h4>'),
|
||||||
@ -108,6 +108,8 @@ class PML_HTMLizer(object):
|
|||||||
'h4',
|
'h4',
|
||||||
'h5',
|
'h5',
|
||||||
'h6',
|
'h6',
|
||||||
|
'sb',
|
||||||
|
'sp',
|
||||||
]
|
]
|
||||||
|
|
||||||
DIV_STATES = [
|
DIV_STATES = [
|
||||||
@ -135,6 +137,13 @@ class PML_HTMLizer(object):
|
|||||||
def prepare_pml(self, pml):
|
def prepare_pml(self, pml):
|
||||||
# Remove comments
|
# Remove comments
|
||||||
pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
|
pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
|
||||||
|
|
||||||
|
# Remove extra white spaces.
|
||||||
|
pml = re.sub(r'(?mus)[ ]{2,}', ' ', pml)
|
||||||
|
pml = re.sub(r'(?mus)^[ ]*(?=.)', '', pml)
|
||||||
|
pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
|
||||||
|
pml = re.sub(r'(?mus)^[ ]*$', '', pml)
|
||||||
|
|
||||||
# Footnotes and Sidebars
|
# Footnotes and Sidebars
|
||||||
pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
||||||
pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
||||||
@ -150,14 +159,6 @@ class PML_HTMLizer(object):
|
|||||||
|
|
||||||
return pml
|
return pml
|
||||||
|
|
||||||
def prepare_line(self, line):
|
|
||||||
line = re.sub(r'[ ]{2,}', ' ', line)
|
|
||||||
line = re.sub(r'^[ ]*(?=.)', '', line)
|
|
||||||
line = re.sub(r'(?<=.)[ ]*$', '', line)
|
|
||||||
line = re.sub(r'^[ ]*$', '', line)
|
|
||||||
|
|
||||||
return line
|
|
||||||
|
|
||||||
def cleanup_html(self, html):
|
def cleanup_html(self, html):
|
||||||
old = html
|
old = html
|
||||||
html = self.cleanup_html_remove_redundant(html)
|
html = self.cleanup_html_remove_redundant(html)
|
||||||
@ -217,7 +218,9 @@ class PML_HTMLizer(object):
|
|||||||
text = u''
|
text = u''
|
||||||
ds = []
|
ds = []
|
||||||
|
|
||||||
code = self.CODE_STATES[code]
|
code = self.CODE_STATES.get(code, None)
|
||||||
|
if not code:
|
||||||
|
return text
|
||||||
|
|
||||||
if code in self.DIV_STATES:
|
if code in self.DIV_STATES:
|
||||||
ds = self.DIV_STATES[:]
|
ds = self.DIV_STATES[:]
|
||||||
@ -278,7 +281,9 @@ class PML_HTMLizer(object):
|
|||||||
def process_code_block(self, code, stream, pre=''):
|
def process_code_block(self, code, stream, pre=''):
|
||||||
text = u''
|
text = u''
|
||||||
|
|
||||||
code = self.CODE_STATES[code]
|
code = self.CODE_STATES.get(code, None)
|
||||||
|
if not code:
|
||||||
|
return text
|
||||||
|
|
||||||
# Close all spans
|
# Close all spans
|
||||||
for c in self.SPAN_STATES:
|
for c in self.SPAN_STATES:
|
||||||
@ -312,27 +317,12 @@ class PML_HTMLizer(object):
|
|||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def process_code_simple(self, code):
|
|
||||||
if code not in self.CODE_STATES.keys():
|
|
||||||
return u''
|
|
||||||
|
|
||||||
text = u''
|
|
||||||
|
|
||||||
if self.state[self.CODE_STATES[code]][0]:
|
|
||||||
text = self.STATES_TAGS[self.CODE_STATES[code]][1]
|
|
||||||
else:
|
|
||||||
text = self.STATES_TAGS[self.CODE_STATES[code]][0]
|
|
||||||
|
|
||||||
self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0]
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
def code_value(self, stream):
|
def code_value(self, stream):
|
||||||
value = u''
|
value = u''
|
||||||
# state 0 is before =
|
# state 0 is before =
|
||||||
# state 1 is before the first "
|
# state 1 is before the first "
|
||||||
# state 2 is before the second "
|
# state 2 is before the second "
|
||||||
|
# state 3 is after the second "
|
||||||
state = 0
|
state = 0
|
||||||
loc = stream.tell()
|
loc = stream.tell()
|
||||||
|
|
||||||
@ -341,6 +331,13 @@ class PML_HTMLizer(object):
|
|||||||
if state == 0:
|
if state == 0:
|
||||||
if c == '=':
|
if c == '=':
|
||||||
state = 1
|
state = 1
|
||||||
|
elif c != ' ':
|
||||||
|
# A code that requires an argument should have = after the
|
||||||
|
# code but sometimes has spaces. If it has anything other
|
||||||
|
# than a space or = after the code then we can assume the
|
||||||
|
# markup is invalid. We will stop looking for the value
|
||||||
|
# and continue to hopefully not lose any data.
|
||||||
|
break;
|
||||||
elif state == 1:
|
elif state == 1:
|
||||||
if c == '"':
|
if c == '"':
|
||||||
state = 2
|
state = 2
|
||||||
@ -353,6 +350,8 @@ class PML_HTMLizer(object):
|
|||||||
c = stream.read(1)
|
c = stream.read(1)
|
||||||
|
|
||||||
if state != 3:
|
if state != 3:
|
||||||
|
# Unable to complete the sequence to reterieve the value. Reset
|
||||||
|
# the stream to the location it started.
|
||||||
stream.seek(loc)
|
stream.seek(loc)
|
||||||
value = u''
|
value = u''
|
||||||
|
|
||||||
@ -370,13 +369,12 @@ class PML_HTMLizer(object):
|
|||||||
self.state[s] = [False, ''];
|
self.state[s] = [False, ''];
|
||||||
|
|
||||||
for line in pml.splitlines():
|
for line in pml.splitlines():
|
||||||
parsed = []
|
|
||||||
empty = True
|
|
||||||
|
|
||||||
line = self.prepare_line(line)
|
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
parsed = []
|
||||||
|
empty = True
|
||||||
|
|
||||||
# Must use StringIO, cStringIO does not support unicode
|
# Must use StringIO, cStringIO does not support unicode
|
||||||
line = StringIO.StringIO(line)
|
line = StringIO.StringIO(line)
|
||||||
parsed.append(self.start_line())
|
parsed.append(self.start_line())
|
||||||
@ -389,15 +387,15 @@ class PML_HTMLizer(object):
|
|||||||
c = line.read(1)
|
c = line.read(1)
|
||||||
|
|
||||||
if c == 'x':
|
if c == 'x':
|
||||||
text = self.process_code_simple(c)
|
text = self.process_code_block(c, line)
|
||||||
elif c in 'XS':
|
elif c in 'XS':
|
||||||
l = line.read(1)
|
l = line.read(1)
|
||||||
if '%s%s' % (c, l) == 'Sd':
|
if '%s%s' % (c, l) == 'SB':
|
||||||
text = self.process_code_block('Sd', line, 'fns')
|
|
||||||
elif '%s%s' % (c, l) == 'SB':
|
|
||||||
text = self.process_code('SB', line)
|
text = self.process_code('SB', line)
|
||||||
|
elif '%s%s' % (c, l) == 'Sd':
|
||||||
|
text = self.process_code_block('Sd', line, 'fns')
|
||||||
else:
|
else:
|
||||||
text = self.process_code_simple('%s%s' % (c, l))
|
text = self.process_code_block('%s%s' % (c, l), line)
|
||||||
elif c == 'q':
|
elif c == 'q':
|
||||||
text = self.process_code_block(c, line)
|
text = self.process_code_block(c, line)
|
||||||
elif c in 'crtTiIuobBlk':
|
elif c in 'crtTiIuobBlk':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user