TXT Input: Texttile: Simplify code for handing macros and glyphs.

This commit is contained in:
John Schember 2011-03-19 15:32:05 -04:00
parent bfd2192221
commit a300879f70

View File

@ -121,97 +121,113 @@ class Textile(object):
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
btag_lite = ('bq', 'bc', 'p') btag_lite = ('bq', 'bc', 'p')
glyph_defaults = ( macro_defaults = [
('mac_cent', '¢'), (re.compile(r'{(c\||\|c)}'), r'¢'), # cent
('mac_pound', '£'), (re.compile(r'{(L-|-L)}'), r'£'), # pound
('mac_yen', '¥'), (re.compile(r'{(Y=|=Y)}'), r'¥'), # yen
('mac_quarter', '¼'), (re.compile(r'{\(c\)}'), r'©'), # copyright
('mac_half', '½'), (re.compile(r'{\(r\)}'), r'®'), # registered
('mac_three-quarter', '¾'), (re.compile(r'{(\+_|_\+)}'), r'±'), # plus-minus
('mac_cA-grave', 'À'), (re.compile(r'{1/4}'), r'¼'), # quarter
('mac_cA-acute', 'Á'), (re.compile(r'{1/2}'), r'½'), # half
('mac_cA-circumflex', 'Â'), (re.compile(r'{3/4}'), r'¾'), # three-quarter
('mac_cA-tilde', 'Ã'), (re.compile(r'{(A`|`A)}'), r'À'), # A-acute
('mac_cA-diaeresis', 'Ä'), (re.compile(r'{(A\'|\'A)}'), r'Á'), # A-grave
('mac_cA-ring', 'Å'), (re.compile(r'{(A\^|\^A)}'), r'Â'), # A-circumflex
('mac_cAE', 'Æ'), (re.compile(r'{(A~|~A)}'), r'Ã'), # A-tilde
('mac_cC-cedilla', 'Ç'), (re.compile(r'{(A\"|\"A)}'), r'Ä'), # A-diaeresis
('mac_cE-grave', 'È'), (re.compile(r'{(Ao|oA)}'), r'Å'), # A-ring
('mac_cE-acute', 'É'), (re.compile(r'{(AE)}'), r'Æ'), # AE
('mac_cE-circumflex', 'Ê'), (re.compile(r'{(C,|,C)}'), r'Ç'), # C-cedilla
('mac_cE-diaeresis', 'Ë'), (re.compile(r'{(E`|`E)}'), r'È'), # E-acute
('mac_cI-grave', 'Ì'), (re.compile(r'{(E\'|\'E)}'), r'É'), # E-grave
('mac_cI-acute', 'Í'), (re.compile(r'{(E\^|\^E)}'), r'Ê'), # E-circumflex
('mac_cI-circumflex', 'Î'), (re.compile(r'{(E\"|\"E)}'), r'Ë'), # E-diaeresis
('mac_cI-diaeresis', 'Ï'), (re.compile(r'{(I`|`I)}'), r'Ì'), # I-acute
('mac_cEth', 'Ð'), (re.compile(r'{(I\'|\'I)}'), r'Í'), # I-grave
('mac_cN-tilde', 'Ñ'), (re.compile(r'{(I\^|\^I)}'), r'Î'), # I-circumflex
('mac_cO-grave', 'Ò'), (re.compile(r'{(I\"|\"I)}'), r'Ï'), # I-diaeresis
('mac_cO-acute', 'Ó'), (re.compile(r'{(D-|-D)}'), r'Ð'), # ETH
('mac_cO-circumflex', 'Ô'), (re.compile(r'{(N~|~N)}'), r'Ñ'), # N-tilde
('mac_cO-tilde', 'Õ'), (re.compile(r'{(O`|`O)}'), r'Ò'), # O-acute
('mac_cO-diaeresis', 'Ö'), (re.compile(r'{(O\'|\'O)}'), r'Ó'), # O-grave
('mac_cO-stroke', 'Ø'), (re.compile(r'{(O\^|\^O)}'), r'Ô'), # O-circumflex
('mac_cU-grave', 'Ù'), (re.compile(r'{(O~|~O)}'), r'Õ'), # O-tilde
('mac_cU-acute', 'Ú'), (re.compile(r'{(O\"|\"O)}'), r'Ö'), # O-diaeresis
('mac_cU-circumflex', 'Û'), (re.compile(r'{x}'), r'×'), # dimension
('mac_cU-diaeresis', 'Ü'), (re.compile(r'{(O\/|\/O)}'), r'Ø'), # O-slash
('mac_cY-acute', 'Ý'), (re.compile(r'{(U`|`U)}'), r'Ù'), # U-acute
('mac_sa-grave', 'à'), (re.compile(r'{(U\'|\'U)}'), r'Ú'), # U-grave
('mac_sa-acute', 'á'), (re.compile(r'{(U\^|\^U)}'), r'Û'), # U-circumflex
('mac_sa-circumflex', 'â'), (re.compile(r'{(U\"|\"U)}'), r'Ü'), # U-diaeresis
('mac_sa-tilde', 'ã'), (re.compile(r'{(Y\'|\'Y)}'), r'Ý'), # Y-grave
('mac_sa-diaeresis', 'ä'), (re.compile(r'{sz}'), r'ß'), # sharp-s
('mac_sa-ring', 'å'), (re.compile(r'{(a`|`a)}'), r'à'), # a-grave
('mac_sae', 'æ'), (re.compile(r'{(a\'|\'a)}'), r'á'), # a-acute
('mac_sc-cedilla', 'ç'), (re.compile(r'{(a\^|\^a)}'), r'â'), # a-circumflex
('mac_se-grave', 'è'), (re.compile(r'{(a~|~a)}'), r'ã'), # a-tilde
('mac_se-acute', 'é'), (re.compile(r'{(a\"|\"a)}'), r'ä'), # a-diaeresis
('mac_se-circumflex', 'ê'), (re.compile(r'{(ao|oa)}'), r'å'), # a-ring
('mac_se-diaeresis', 'ë'), (re.compile(r'{ae}'), r'æ'), # ae
('mac_si-grave', 'ì'), (re.compile(r'{(c,|,c)}'), r'ç'), # c-cedilla
('mac_si-acute', 'í'), (re.compile(r'{(e`|`e)}'), r'è'), # e-grave
('mac_si-circumflex', 'î'), (re.compile(r'{(e\'|\'e)}'), r'é'), # e-acute
('mac_si-diaeresis', 'ï'), (re.compile(r'{(e\^|\^e)}'), r'ê'), # e-circumflex
('mac_sn-tilde', 'ñ'), (re.compile(r'{(e\"|\"e)}'), r'ë'), # e-diaeresis
('mac_so-grave', 'ò'), (re.compile(r'{(i`|`i)}'), r'ì'), # i-grave
('mac_so-acute', 'ó'), (re.compile(r'{(i\'|\'i)}'), r'í'), # i-acute
('mac_so-circumflex', 'ô'), (re.compile(r'{(i\^|\^i)}'), r'î'), # i-circumflex
('mac_so-tilde', 'õ'), (re.compile(r'{(i\"|\"i)}'), r'ï'), # i-diaeresis
('mac_so-diaeresis', 'ö'), (re.compile(r'{(d-|-d)}'), r'ð'), # eth
('mac_so-stroke', 'ø'), (re.compile(r'{(n~|~n)}'), r'ñ'), # n-tilde
('mac_su-grave', 'ù'), (re.compile(r'{(o`|`o)}'), r'ò'), # o-grave
('mac_su-acute', 'ú'), (re.compile(r'{(o\'|\'o)}'), r'ó'), # o-acute
('mac_su-circumflex', 'û'), (re.compile(r'{(o\^|\^o)}'), r'ô'), # o-circumflex
('mac_su-diaeresis', 'ü'), (re.compile(r'{(o~|~o)}'), r'õ'), # o-tilde
('mac_sy-acute', 'ý'), (re.compile(r'{(o\"|\"o)}'), r'ö'), # o-diaeresis
('mac_sy-diaeresis', 'ÿ'), (re.compile(r'{(o\/|\/o)}'), r'ø'), # o-stroke
('mac_cOE', 'Œ'), (re.compile(r'{(u`|`u)}'), r'ù'), # u-grave
('mac_soe', 'œ'), (re.compile(r'{(u\'|\'u)}'), r'ú'), # u-acute
('mac_bullet', '•'), (re.compile(r'{(u\^|\^u)}'), r'û'), # u-circumflex
('mac_franc', '₣'), (re.compile(r'{(u\"|\"u)}'), r'ü'), # u-diaeresis
('mac_lira', '₤'), (re.compile(r'{(y\'|\'y)}'), r'ý'), # y-acute
('mac_rupee', '₨'), (re.compile(r'{(y\"|\"y)}'), r'ÿ'), # y-diaeresis
('mac_euro', '€'), (re.compile(r'{OE}'), r'Œ'), # OE
('mac_spade', '♠'), (re.compile(r'{oe}'), r'œ'), # oe
('mac_club', '♣'), (re.compile(r'{(S\^|\^S)}'), r'Š'), # Scaron
('mac_heart', '♥'), (re.compile(r'{(s\^|\^s)}'), r'š'), # scaron
('mac_diamond', '♦'), (re.compile(r'{\*}'), r'•'), # bullet
('txt_dimension', '×'), (re.compile(r'{Fr}'), r'₣'), # Franc
('txt_quote_single_open', '‘'), (re.compile(r'{(L=|=L)}'), r'₤'), # Lira
('txt_quote_single_close', '’'), (re.compile(r'{Rs}'), r'₨'), # Rupee
('txt_quote_double_open', '“'), (re.compile(r'{(C=|=C)}'), r'€'), # euro
('txt_quote_double_close', '”'), (re.compile(r'{tm}'), r'™'), # trademark
('txt_apostrophe', '’'), (re.compile(r'{spade}'), r'♠'), # spade
('txt_prime', '′'), (re.compile(r'{club}'), r'♣'), # club
('txt_prime_double', '″'), (re.compile(r'{heart}'), r'♥'), # heart
('txt_ellipsis', '…'), (re.compile(r'{diamond}'), r'♦'), # diamond
('txt_emdash', '—'), ]
('txt_endash', '–'), glyph_defaults = [
('txt_trademark', '™'), (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
('txt_registered', '®'), (re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
('txt_copyright', '©'), (re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
) (re.compile(r"(\w)\'(\w)"), r'\1’\2'), # apostrophe's
(re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), r'\1’\2'), # back in '88
(re.compile(r'(\S)\'(?=\s|\'|<|$)'), r'\1&#8217;'), # single closing
(re.compile(r'\'/'), r'&#8216;'), # single opening
(re.compile(r'(\")\"'), r'\1&#8221;'), # double closing - following another
(re.compile(r'(\S)\"(?=\s|\"|<|$)'), r'\1&#8221;'), # double closing
(re.compile(r'"'), r'&#8220;'), # double opening
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1&#8260;'), # ellipsis
(re.compile(r'(\s?)--(\s?)'), r'\1&#8212;\2'), # em dash
(re.compile(r'\s-(?:\s|$)'), r' &#8211; '), # en dash
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1&#8482;'), # trademark
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1&#174;'), # registered
(re.compile(r'\b( ?)[([]C[])]', re.I), r'\1&#169;'), # copyright
]
def __init__(self, restricted=False, lite=False, noimage=False): def __init__(self, restricted=False, lite=False, noimage=False):
"""docstring for __init__""" """docstring for __init__"""
@ -673,211 +689,15 @@ class Textile(object):
# fix: hackish # fix: hackish
text = re.sub(r'"\Z', '\" ', text) text = re.sub(r'"\Z', '\" ', text)
glyph_search = (
re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign
re.compile(r"(\w)\'(\w)"), # apostrophe's
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
re.compile(r'\'/'), # single opening
re.compile(r'(\")\"'), # double closing - following another
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
re.compile(r'"'), # double opening
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
re.compile(r'(\s?)--(\s?)'), # em dash
re.compile(r'\s-(?:\s|$)'), # en dash
re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark
re.compile(r'\b( ?)[([]R[])]', re.I), # registered
re.compile(r'\b( ?)[([]C[])]', re.I) # copyright
)
glyph_replace = [x % dict(self.glyph_defaults) for x in (
r'\1\2%(txt_dimension)s\3', # dimension sign
r'\1%(txt_apostrophe)s\2', # apostrophe's
r'\1%(txt_apostrophe)s\2', # back in '88
r'\1%(txt_quote_single_close)s', # single closing
r'%(txt_quote_single_open)s', # single opening
r'\1%(txt_quote_double_close)s', # double closing - following another
r'\1%(txt_quote_double_close)s', # double closing
r'%(txt_quote_double_open)s', # double opening
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
r'<span class="caps">\1</span>', # 3+ uppercase
r'\1%(txt_ellipsis)s', # ellipsis
r'\1%(txt_emdash)s\2', # em dash
r' %(txt_endash)s ', # en dash
r'\1%(txt_trademark)s', # trademark
r'\1%(txt_registered)s', # registered
r'\1%(txt_copyright)s' # copyright
)]
if re.search(r'{.+?}', text):
glyph_search += (
re.compile(r'{(c\||\|c)}'), # cent
re.compile(r'{(L-|-L)}'), # pound
re.compile(r'{(Y=|=Y)}'), # yen
re.compile(r'{\(c\)}'), # copyright
re.compile(r'{\(r\)}'), # registered
re.compile(r'{1/4}'), # quarter
re.compile(r'{1/2}'), # half
re.compile(r'{3/4}'), # three-quarter
re.compile(r'{(A`|`A)}'), # 192;
re.compile(r'{(A\'|\'A)}'), # 193;
re.compile(r'{(A\^|\^A)}'), # 194;
re.compile(r'{(A~|~A)}'), # 195;
re.compile(r'{(A\"|\"A)}'), # 196;
re.compile(r'{(Ao|oA)}'), # 197;
re.compile(r'{(AE)}'), # 198;
re.compile(r'{(C,|,C)}'), # 199;
re.compile(r'{(E`|`E)}'), # 200;
re.compile(r'{(E\'|\'E)}'), # 201;
re.compile(r'{(E\^|\^E)}'), # 202;
re.compile(r'{(E\"|\"E)}'), # 203;
re.compile(r'{(I`|`I)}'), # 204;
re.compile(r'{(I\'|\'I)}'), # 205;
re.compile(r'{(I\^|\^I)}'), # 206;
re.compile(r'{(I\"|\"I)}'), # 207;
re.compile(r'{(D-|-D)}'), # 208;
re.compile(r'{(N~|~N)}'), # 209;
re.compile(r'{(O`|`O)}'), # 210;
re.compile(r'{(O\'|\'O)}'), # 211;
re.compile(r'{(O\^|\^O)}'), # 212;
re.compile(r'{(O~|~O)}'), # 213;
re.compile(r'{(O\"|\"O)}'), # 214;
re.compile(r'{(O\/|\/O)}'), # 215;
re.compile(r'{(U`|`U)}'), # 216;
re.compile(r'{(U\'|\'U)}'), # 217;
re.compile(r'{(U\^|\^U)}'), # 218;
re.compile(r'{(U\"|\"U)}'), # 219;
re.compile(r'{(Y\'|\'Y)}'), # 220;
re.compile(r'{(a`|`a)}'), # a-grace
re.compile(r'{(a\'|\'a)}'), # a-acute
re.compile(r'{(a\^|\^a)}'), # a-circumflex
re.compile(r'{(a~|~a)}'), # a-tilde
re.compile(r'{(a\"|\"a)}'), # a-diaeresis
re.compile(r'{(ao|oa)}'), # a-ring
re.compile(r'{ae}'), # ae
re.compile(r'{(c,|,c)}'), # c-cedilla
re.compile(r'{(e`|`e)}'), # e-grace
re.compile(r'{(e\'|\'e)}'), # e-acute
re.compile(r'{(e\^|\^e)}'), # e-circumflex
re.compile(r'{(e\"|\"e)}'), # e-diaeresis
re.compile(r'{(i`|`i)}'), # i-grace
re.compile(r'{(i\'|\'i)}'), # i-acute
re.compile(r'{(i\^|\^i)}'), # i-circumflex
re.compile(r'{(i\"|\"i)}'), # i-diaeresis
re.compile(r'{(n~|~n)}'), # n-tilde
re.compile(r'{(o`|`o)}'), # o-grace
re.compile(r'{(o\'|\'o)}'), # o-acute
re.compile(r'{(o\^|\^o)}'), # o-circumflex
re.compile(r'{(o~|~o)}'), # o-tilde
re.compile(r'{(o\"|\"o)}'), # o-diaeresis
re.compile(r'{(o\/|\/o)}'), # o-stroke
re.compile(r'{(u`|`u)}'), # u-grace
re.compile(r'{(u\'|\'u)}'), # u-acute
re.compile(r'{(u\^|\^u)}'), # u-circumflex
re.compile(r'{(u\"|\"u)}'), # u-diaeresis
re.compile(r'{(y\'|\'y)}'), # y-acute
re.compile(r'{(y\"|\"y)}'), # y-diaeresis
re.compile(r'{OE}'), # y-diaeresis
re.compile(r'{oe}'), # y-diaeresis
re.compile(r'{\*}'), # bullet
re.compile(r'{Fr}'), # Franc
re.compile(r'{(L=|=L)}'), # Lira
re.compile(r'{Rs}'), # Rupee
re.compile(r'{(C=|=C)}'), # euro
re.compile(r'{tm}'), # euro
re.compile(r'{spade}'), # spade
re.compile(r'{club}'), # club
re.compile(r'{heart}'), # heart
re.compile(r'{diamond}') # diamond
)
glyph_replace += [x % dict(self.glyph_defaults) for x in (
r'%(mac_cent)s', # cent
r'%(mac_pound)s', # pound
r'%(mac_yen)s', # yen
r'%(txt_copyright)s', # copyright
r'%(txt_registered)s', # registered
r'%(mac_quarter)s', # quarter
r'%(mac_half)s', # half
r'%(mac_three-quarter)s', # three-quarter
r'%(mac_cA-grave)s', # 192;
r'%(mac_cA-acute)s', # 193;
r'%(mac_cA-circumflex)s', # 194;
r'%(mac_cA-tilde)s', # 195;
r'%(mac_cA-diaeresis)s', # 196;
r'%(mac_cA-ring)s', # 197;
r'%(mac_cAE)s', # 198;
r'%(mac_cC-cedilla)s', # 199;
r'%(mac_cE-grave)s', # 200;
r'%(mac_cE-acute)s', # 201;
r'%(mac_cE-circumflex)s', # 202;
r'%(mac_cE-diaeresis)s', # 203;
r'%(mac_cI-grave)s', # 204;
r'%(mac_cI-acute)s', # 205;
r'%(mac_cI-circumflex)s', # 206;
r'%(mac_cI-diaeresis)s', # 207;
r'%(mac_cEth)s', # 208;
r'%(mac_cN-tilde)s', # 209;
r'%(mac_cO-grave)s', # 210;
r'%(mac_cO-acute)s', # 211;
r'%(mac_cO-circumflex)s', # 212;
r'%(mac_cO-tilde)s', # 213;
r'%(mac_cO-diaeresis)s', # 214;
r'%(mac_cO-stroke)s', # 216;
r'%(mac_cU-grave)s', # 217;
r'%(mac_cU-acute)s', # 218;
r'%(mac_cU-circumflex)s', # 219;
r'%(mac_cU-diaeresis)s', # 220;
r'%(mac_cY-acute)s', # 221;
r'%(mac_sa-grave)s', # 224;
r'%(mac_sa-acute)s', # 225;
r'%(mac_sa-circumflex)s', # 226;
r'%(mac_sa-tilde)s', # 227;
r'%(mac_sa-diaeresis)s', # 228;
r'%(mac_sa-ring)s', # 229;
r'%(mac_sae)s', # 230;
r'%(mac_sc-cedilla)s', # 231;
r'%(mac_se-grave)s', # 232;
r'%(mac_se-acute)s', # 233;
r'%(mac_se-circumflex)s', # 234;
r'%(mac_se-diaeresis)s', # 235;
r'%(mac_si-grave)s', # 236;
r'%(mac_si-acute)s', # 237;
r'%(mac_si-circumflex)s', # 238;
r'%(mac_si-diaeresis)s', # 239;
r'%(mac_sn-tilde)s', # 241;
r'%(mac_so-grave)s', # 242;
r'%(mac_so-acute)s', # 243;
r'%(mac_so-circumflex)s', # 244;
r'%(mac_so-tilde)s', # 245;
r'%(mac_so-diaeresis)s', # 246;
r'%(mac_so-stroke)s', # 248;
r'%(mac_su-grave)s', # 249;
r'%(mac_su-acute)s', # 250;
r'%(mac_su-circumflex)s', # 251;
r'%(mac_su-diaeresis)s', # 252;
r'%(mac_sy-acute)s', # 253;
r'%(mac_sy-diaeresis)s', # 255;
r'%(mac_cOE)s', # 338;
r'%(mac_soe)s', # 339;
r'%(mac_bullet)s', # bullet
r'%(mac_franc)s', # franc
r'%(mac_lira)s', # lira
r'%(mac_rupee)s', # rupee
r'%(mac_euro)s', # euro
r'%(txt_trademark)s', # trademark
r'%(mac_spade)s', # spade
r'%(mac_club)s', # club
r'%(mac_heart)s', # heart
r'%(mac_diamond)s' # diamond
)]
result = [] result = []
for line in re.compile(r'(<.*?>)', re.U).split(text): for line in re.compile(r'(<.*?>)', re.U).split(text):
if not re.search(r'<.*>', line): if not re.search(r'<.*>', line):
for s, r in zip(glyph_search, glyph_replace): rules = []
if re.search(r'{.+?}', line):
rules = self.macro_defaults + self.glyph_defaults
else:
rules = self.glyph_defaults
for s, r in rules:
line = s.sub(r, line) line = s.sub(r, line)
result.append(line) result.append(line)
return ''.join(result) return ''.join(result)
@ -1045,7 +865,7 @@ class Textile(object):
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye' 'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
""" """
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^') qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
pnct = ".,\"'?!;:" pnct = ".,\"'?!;:()"
for qtag in qtags: for qtag in qtags:
pattern = re.compile(r""" pattern = re.compile(r"""