mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More changes.
This commit is contained in:
parent
c384188057
commit
842ba75557
@ -12,7 +12,7 @@ A Humane Web Text Generator
|
|||||||
#__date__ = '2009/12/04'
|
#__date__ = '2009/12/04'
|
||||||
|
|
||||||
__copyright__ = """
|
__copyright__ = """
|
||||||
Copyright (c) 2011, Leigh Parry <leighparry@blueyonder.co.uk>
|
Copyright (c) 2011, Leigh Parry
|
||||||
Copyright (c) 2011, John Schember <john@nachtimwald.com>
|
Copyright (c) 2011, John Schember <john@nachtimwald.com>
|
||||||
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
|
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
|
||||||
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
|
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
|
||||||
@ -219,14 +219,13 @@ class Textile(object):
|
|||||||
]
|
]
|
||||||
glyph_defaults = [
|
glyph_defaults = [
|
||||||
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
||||||
(re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
|
(re.compile(r'(\d+)\'(\s)', re.I), r'\1′\2'), # prime
|
||||||
(re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
|
(re.compile(r'(\d+)\"(\s)', re.I), r'\1″\2'), # prime-double
|
||||||
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
||||||
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
||||||
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
||||||
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
|
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
|
||||||
# (re.compile(r'\b--\b'), r'—'), # em dash
|
(re.compile(r'(^|[^-])--([^-]|$)'), r'\1—\2'), # em dash
|
||||||
(re.compile(r'([^-])--([^-])'), r'\1—\2'), # em dash
|
|
||||||
(re.compile(r'\s-(?:\s|$)'), r' – '), # en dash
|
(re.compile(r'\s-(?:\s|$)'), r' – '), # en dash
|
||||||
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1™'), # trademark
|
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1™'), # trademark
|
||||||
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1®'), # registered
|
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1®'), # registered
|
||||||
@ -706,6 +705,21 @@ class Textile(object):
|
|||||||
result.append(line)
|
result.append(line)
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
|
def glyphs_only(self, text):
|
||||||
|
# fix: hackish
|
||||||
|
text = re.sub(r'"\Z', '\" ', text)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for line in re.compile(r'(<.*?>)', re.U).split(text):
|
||||||
|
if not re.search(r'<.*>', line):
|
||||||
|
rules = []
|
||||||
|
if re.search(r'{.+?}', line):
|
||||||
|
rules = self.macro_defaults
|
||||||
|
for s, r in rules:
|
||||||
|
line = s.sub(r, line)
|
||||||
|
result.append(line)
|
||||||
|
return ''.join(result)
|
||||||
|
|
||||||
def vAlign(self, input):
|
def vAlign(self, input):
|
||||||
d = {'^':'top', '-':'middle', '~':'bottom'}
|
d = {'^':'top', '-':'middle', '~':'bottom'}
|
||||||
return d.get(input, '')
|
return d.get(input, '')
|
||||||
@ -792,7 +806,6 @@ class Textile(object):
|
|||||||
text = self.noTextile(text)
|
text = self.noTextile(text)
|
||||||
text = self.code(text)
|
text = self.code(text)
|
||||||
|
|
||||||
text = self.glyphs(text)
|
|
||||||
text = self.links(text)
|
text = self.links(text)
|
||||||
|
|
||||||
if not self.noimage:
|
if not self.noimage:
|
||||||
@ -804,6 +817,7 @@ class Textile(object):
|
|||||||
|
|
||||||
text = self.span(text)
|
text = self.span(text)
|
||||||
text = self.footnoteRef(text)
|
text = self.footnoteRef(text)
|
||||||
|
text = self.glyphs(text)
|
||||||
|
|
||||||
return text.rstrip('\n')
|
return text.rstrip('\n')
|
||||||
|
|
||||||
@ -814,6 +828,7 @@ class Textile(object):
|
|||||||
'fooobar ... and hello world ...'
|
'fooobar ... and hello world ...'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
text = self.glyphs_only(text)
|
||||||
punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
|
punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
|
||||||
|
|
||||||
pattern = r'''
|
pattern = r'''
|
||||||
@ -868,7 +883,7 @@ class Textile(object):
|
|||||||
>>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
|
>>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
|
||||||
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
|
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
|
||||||
"""
|
"""
|
||||||
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^', r'&')
|
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
|
||||||
pnct = ".,\"'?!;:"
|
pnct = ".,\"'?!;:"
|
||||||
|
|
||||||
for qtag in qtags:
|
for qtag in qtags:
|
||||||
@ -900,9 +915,7 @@ class Textile(object):
|
|||||||
'%' : 'span',
|
'%' : 'span',
|
||||||
'+' : 'ins',
|
'+' : 'ins',
|
||||||
'~' : 'sub',
|
'~' : 'sub',
|
||||||
'^' : 'sup',
|
'^' : 'sup'
|
||||||
'&' : 'span style="font-variant:small-caps;"'
|
|
||||||
# '&' : 'span style="font-transform:uppercase;font-size:smaller;"'
|
|
||||||
}
|
}
|
||||||
tag = qtags[tag]
|
tag = qtags[tag]
|
||||||
atts = self.pba(atts)
|
atts = self.pba(atts)
|
||||||
@ -1046,4 +1059,3 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
|
|||||||
return Textile(restricted=True, lite=lite,
|
return Textile(restricted=True, lite=lite,
|
||||||
noimage=noimage).textile(text, rel='nofollow',
|
noimage=noimage).textile(text, rel='nofollow',
|
||||||
html_type=html_type)
|
html_type=html_type)
|
||||||
|
|
||||||
|
@ -69,7 +69,8 @@ class TextileMLizer(OEB2HTML):
|
|||||||
txt = '%s' % t
|
txt = '%s' % t
|
||||||
if txt != '%':
|
if txt != '%':
|
||||||
text = re.sub(r'([^'+t+'|^\n])'+t+'\]\['+t+'([^'+t+'])', r'\1\2', text)
|
text = re.sub(r'([^'+t+'|^\n])'+t+'\]\['+t+'([^'+t+'])', r'\1\2', text)
|
||||||
text = re.sub(r'(\s|[*_])\[('+t+'[a-zA-Z0-9 \',.*_]+'+t+')\](\s|[*_])', r'\1\2\3', text)
|
text = re.sub(r'([^'+t+'|^\n])'+t+t+'([^'+t+'])', r'\1\2', text)
|
||||||
|
text = re.sub(r'(\s|[*_\'"])\[('+t+'[a-zA-Z0-9 \'",.*_]+'+t+')\](\s|[*_\'"?!,.])', r'\1\2\3', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# Now tidyup links and ids - remove ones that don't have a correponding opposite
|
# Now tidyup links and ids - remove ones that don't have a correponding opposite
|
||||||
@ -77,14 +78,17 @@ class TextileMLizer(OEB2HTML):
|
|||||||
for i in self.our_links:
|
for i in self.our_links:
|
||||||
if i[0] == '#':
|
if i[0] == '#':
|
||||||
if i not in self.our_ids:
|
if i not in self.our_ids:
|
||||||
text = re.sub(r'"(.+)":'+i, '\1', text)
|
self.log.debug('Link has no target - %s ...' % i)
|
||||||
|
text = re.sub(r'"(.+)":'+i+'(\s)', r'\1\2', text)
|
||||||
for i in self.our_ids:
|
for i in self.our_ids:
|
||||||
if i not in self.our_links:
|
if i not in self.our_links:
|
||||||
text = re.sub(r'\('+i+'\)', '', text)
|
self.log.debug('ID has no link - %s ...' % i)
|
||||||
|
text = re.sub(r'%?\('+i+'\)\xa0?%?', r'', text)
|
||||||
|
|
||||||
# Note - I'm not checking for escaped '-' as this will also get hypenated words
|
# Remove obvious non-needed escaping, add sub/sup-script ones
|
||||||
text = check_escaping(text, ['\*', '_', '\+', '-'])
|
text = check_escaping(text, ['\*', '_', '\*'])
|
||||||
# text = check_escaping(text, ['\*', '_', '\+', '-'])
|
text = re.sub(r'(\w)([~^]\w+[~^])', r'\1[\2]', text) # escape the super/sub-scripts if needed
|
||||||
|
text = re.sub(r'([~^]\w+[~^])(\w)', r'[\1]\2', text) # escape the super/sub-scripts if needed
|
||||||
|
|
||||||
text = re.sub(r'%\xa0+', r'%', text) #remove empty spans
|
text = re.sub(r'%\xa0+', r'%', text) #remove empty spans
|
||||||
text = re.sub(r'%%', r'', text) #remove empty spans - MAY MERGE SOME ?
|
text = re.sub(r'%%', r'', text) #remove empty spans - MAY MERGE SOME ?
|
||||||
@ -96,13 +100,14 @@ class TextileMLizer(OEB2HTML):
|
|||||||
# text = re.sub(r'\n{4,}', r'\n\np. \n\n', text) #reduce blank lines + insert blank para
|
# text = re.sub(r'\n{4,}', r'\n\np. \n\n', text) #reduce blank lines + insert blank para
|
||||||
text = re.sub(r'\n{3}', r'\n\n', text) #reduce blank lines
|
text = re.sub(r'\n{3}', r'\n\n', text) #reduce blank lines
|
||||||
# text = re.sub(r' ((\* ?)+) ', r' ==\1== ', text)
|
# text = re.sub(r' ((\* ?)+) ', r' ==\1== ', text)
|
||||||
text = re.sub(u'%\n(p[<>=]{1,2}\.)', r'%\n\n\1', text)
|
text = re.sub(u'%\n(p[<>=]{1,2}\.|p\.)', r'%\n\n\1', text)
|
||||||
|
text = re.sub(r'\n\n {2,4}%', r'%', text) #Check span following blank para
|
||||||
text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
|
text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
|
||||||
text = re.sub(r'(^|\n)p\.\n', r'\1p. \n', text) # blank paragraph
|
text = re.sub(r'(^|\n)p\.\n', r'\1p. \n', text) # blank paragraph
|
||||||
text = re.sub(u'\n\xa0', r'\np. ', text) # blank paragraph
|
text = re.sub(u'\n\xa0', r'\np. ', text) # blank paragraph
|
||||||
text = re.sub(u'\np[<>=]{1,2}?\. \xa0', r'\np. ', text) # blank paragraph
|
text = re.sub(u'\np[<>=]{1,2}?\. \xa0', r'\np. ', text) # blank paragraph
|
||||||
text = re.sub(r'(^|\n)(p.*\. ?\n)(p.*\.)', r'\1\3', text)
|
text = re.sub(r'(^|\n)(p.*\. ?\n)(p.*\.)', r'\1\3', text)
|
||||||
text = re.sub(r'\n(p\. \n)(p.*\.)', r'\n\2', text)
|
text = re.sub(r'\n(p\. \n)(p.*\.|h.*\.)', r'\n\2', text)
|
||||||
text = re.sub(r' {2,}\|', r' |', text) #sort out spaces in tables
|
text = re.sub(r' {2,}\|', r' |', text) #sort out spaces in tables
|
||||||
|
|
||||||
# Now put back spaces removed earlier as they're needed here
|
# Now put back spaces removed earlier as they're needed here
|
||||||
@ -193,7 +198,8 @@ class TextileMLizer(OEB2HTML):
|
|||||||
return txt
|
return txt
|
||||||
|
|
||||||
def prepare_string_for_textile(self, txt):
|
def prepare_string_for_textile(self, txt):
|
||||||
if re.search(r'(\s([*&_+\-=~@%|]|\?{2}))|(([*&_+\-=~@%|]|\?{2})\s)', txt):
|
# if re.search(r'(\s([*&_+\-~@%|]|\?{2}))|(([*&_+\-~@%|]|\?{2})\s)', txt):
|
||||||
|
if re.search(r'(\s([*&_+\-~@%|]|\?{2})\S)|(\S([*&_+\-~@%|]|\?{2})\s)', txt):
|
||||||
return ' ==%s== ' % txt
|
return ' ==%s== ' % txt
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
@ -240,6 +246,10 @@ class TextileMLizer(OEB2HTML):
|
|||||||
if style['font-style'] == 'italic' or tag in ('i', 'em'):
|
if style['font-style'] == 'italic' or tag in ('i', 'em'):
|
||||||
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite'):
|
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite'):
|
||||||
if self.style_italic == False:
|
if self.style_italic == False:
|
||||||
|
if self.in_a_link:
|
||||||
|
text.append('_')
|
||||||
|
tags.append('_')
|
||||||
|
else:
|
||||||
text.append('[_')
|
text.append('[_')
|
||||||
tags.append('_]')
|
tags.append('_]')
|
||||||
self.style_embed.append('_')
|
self.style_embed.append('_')
|
||||||
@ -247,6 +257,10 @@ class TextileMLizer(OEB2HTML):
|
|||||||
if style['font-weight'] in ('bold', 'bolder') or tag in ('b', 'strong'):
|
if style['font-weight'] in ('bold', 'bolder') or tag in ('b', 'strong'):
|
||||||
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'th'):
|
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'th'):
|
||||||
if self.style_bold == False:
|
if self.style_bold == False:
|
||||||
|
if self.in_a_link:
|
||||||
|
text.append('*')
|
||||||
|
tags.append('*')
|
||||||
|
else:
|
||||||
text.append('[*')
|
text.append('[*')
|
||||||
tags.append('*]')
|
tags.append('*]')
|
||||||
self.style_embed.append('*')
|
self.style_embed.append('*')
|
||||||
@ -304,14 +318,17 @@ class TextileMLizer(OEB2HTML):
|
|||||||
tags.append('pre\n')
|
tags.append('pre\n')
|
||||||
elif tag == 'a':
|
elif tag == 'a':
|
||||||
if self.opts.keep_links:
|
if self.opts.keep_links:
|
||||||
|
if attribs.has_key('href'):
|
||||||
text.append('"')
|
text.append('"')
|
||||||
tags.append('a')
|
tags.append('a')
|
||||||
if attribs.has_key('href'):
|
|
||||||
tags.append('":' + attribs['href'])
|
tags.append('":' + attribs['href'])
|
||||||
self.our_links.append(attribs['href'])
|
self.our_links.append(attribs['href'])
|
||||||
if attribs.has_key('title'):
|
if attribs.has_key('title'):
|
||||||
tags.append('(' + attribs['title'] + ')')
|
tags.append('(' + attribs['title'] + ')')
|
||||||
self.in_a_link = True
|
self.in_a_link = True
|
||||||
|
else:
|
||||||
|
text.append('%')
|
||||||
|
tags.append('%')
|
||||||
elif tag == 'img':
|
elif tag == 'img':
|
||||||
if self.opts.keep_image_references:
|
if self.opts.keep_image_references:
|
||||||
txt = '!' + self.check_halign(style)
|
txt = '!' + self.check_halign(style)
|
||||||
@ -432,9 +449,9 @@ class TextileMLizer(OEB2HTML):
|
|||||||
t = ''
|
t = ''
|
||||||
text.append(self.id_no_text)
|
text.append(self.id_no_text)
|
||||||
self.id_no_text = u''
|
self.id_no_text = u''
|
||||||
if t == '*]':
|
if t in ('*]', '*'):
|
||||||
self.style_bold = False
|
self.style_bold = False
|
||||||
elif t == '_]':
|
elif t in ('_]', '_'):
|
||||||
self.style_italic = False
|
self.style_italic = False
|
||||||
elif t == '+]':
|
elif t == '+]':
|
||||||
self.style_under = False
|
self.style_under = False
|
||||||
@ -442,7 +459,7 @@ class TextileMLizer(OEB2HTML):
|
|||||||
self.style_strike = False
|
self.style_strike = False
|
||||||
elif t == '&':
|
elif t == '&':
|
||||||
self.style_smallcap = False
|
self.style_smallcap = False
|
||||||
if t in ('*]', '_]', '+]', '-]'):
|
if t in ('*]', '_]', '+]', '-]', '*', '_'):
|
||||||
txt = self.style_embed.pop()
|
txt = self.style_embed.pop()
|
||||||
text.append('%s' % t)
|
text.append('%s' % t)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user