mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: Textile: Rely on smarty pants to handle quotes.
This commit is contained in:
parent
2d46e35a6c
commit
15fa4f71c4
@ -211,16 +211,6 @@ class Textile(object):
|
|||||||
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
||||||
(re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
|
(re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
|
||||||
(re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
|
(re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
|
||||||
(re.compile(r'(\')\''), r'\1’'), # single closing - following another
|
|
||||||
(re.compile(r"(\w)\'(\w)"), r'\1’\2'), # apostrophe's
|
|
||||||
(re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), r'\1’\2'), # back in '88
|
|
||||||
(re.compile(r'(\s\[)\''), r'\1‘'), # single opening - following ws+[
|
|
||||||
(re.compile(r'(\S)\'(?=\s|'+pnct+'|<|$)', re.M), r'\1’'), # single closing
|
|
||||||
(re.compile(r'\''), r'‘'), # single opening
|
|
||||||
(re.compile(r'(\")\"'), r'\1”'), # double closing - following another
|
|
||||||
(re.compile(r'(\s\[)\"'), r'\1“'), # double opening - following whitespace+[
|
|
||||||
(re.compile(r'(\S)\"(?=\s|'+pnct+'|<|$)', re.M), r'\1”'), # double closing
|
|
||||||
(re.compile(r'"'), r'“'), # double opening
|
|
||||||
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
||||||
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
||||||
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
||||||
@ -870,11 +860,11 @@ class Textile(object):
|
|||||||
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
|
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
|
||||||
"""
|
"""
|
||||||
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
|
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
|
||||||
pnct = ".,\"'?!;:()"
|
pnct = ".,\"'?!;:"
|
||||||
|
|
||||||
for qtag in qtags:
|
for qtag in qtags:
|
||||||
pattern = re.compile(r"""
|
pattern = re.compile(r"""
|
||||||
(?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
|
(?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
|
||||||
(%(qtag)s)(?!%(qtag)s)
|
(%(qtag)s)(?!%(qtag)s)
|
||||||
(%(c)s)
|
(%(c)s)
|
||||||
(?::(\S+))?
|
(?::(\S+))?
|
||||||
|
@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
elif options.formatting_type == 'textile':
|
elif options.formatting_type == 'textile':
|
||||||
log.debug('Running text through textile conversion...')
|
log.debug('Running text through textile conversion...')
|
||||||
html = convert_textile(txt)
|
html = convert_textile(txt)
|
||||||
|
setattr(options, 'smarten_punctuation', True)
|
||||||
else:
|
else:
|
||||||
log.debug('Running text through basic conversion...')
|
log.debug('Running text through basic conversion...')
|
||||||
flow_size = getattr(options, 'flow_size', 0)
|
flow_size = getattr(options, 'flow_size', 0)
|
||||||
|
@ -584,6 +584,12 @@ def educateQuotes(str):
|
|||||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||||
str = re.sub(r""""'(?=\w)""", """“‘""", str)
|
str = re.sub(r""""'(?=\w)""", """“‘""", str)
|
||||||
str = re.sub(r"""'"(?=\w)""", """‘“""", str)
|
str = re.sub(r"""'"(?=\w)""", """‘“""", str)
|
||||||
|
str = re.sub(r'''""(?=\w)''', """““""", str)
|
||||||
|
str = re.sub(r"""''(?=\w)""", """‘‘""", str)
|
||||||
|
str = re.sub(r'''\"\'''', """”’""", str)
|
||||||
|
str = re.sub(r'''\'\"''', """’”""", str)
|
||||||
|
str = re.sub(r'''""''', """””""", str)
|
||||||
|
str = re.sub(r"""''""", """’’""", str)
|
||||||
|
|
||||||
# Special case for decade abbreviations (the '80s):
|
# Special case for decade abbreviations (the '80s):
|
||||||
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user