From 811d18d868d57db229e2402347789984e1e213de Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 17 Mar 2011 21:15:25 -0400 Subject: [PATCH 1/2] TXT Input: Textile: Fix some replacments. Add {macro} definitions submitted by Perkin. --- src/calibre/ebooks/textile/functions.py | 284 ++++++++++++++++++++++-- 1 file changed, 263 insertions(+), 21 deletions(-) mode change 100644 => 100755 src/calibre/ebooks/textile/functions.py diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py old mode 100644 new mode 100755 index ec675b9b62..b46384ae93 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -10,6 +10,8 @@ __version__ = '2.1.4' __date__ = '2009/12/04' __copyright__ = """ +Copyright (c) 2011, Leigh Parry +Copyright (c) 2011, John Schember Copyright (c) 2009, Jason Samsa, http://jsamsa.com/ Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/ Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/ @@ -120,6 +122,82 @@ class Textile(object): btag_lite = ('bq', 'bc', 'p') glyph_defaults = ( + ('mac_cent', '¢'), + ('mac_pound', '£'), + ('mac_yen', '¥'), + ('mac_quarter', '¼'), + ('mac_half', '½'), + ('mac_three-quarter', '¾'), + ('mac_cA-grave', 'À'), + ('mac_cA-acute', 'Á'), + ('mac_cA-circumflex', 'Â'), + ('mac_cA-tilde', 'Ã'), + ('mac_cA-diaeresis', 'Ä'), + ('mac_cA-ring', 'Å'), + ('mac_cAE', 'Æ'), + ('mac_cC-cedilla', 'Ç'), + ('mac_cE-grave', 'È'), + ('mac_cE-acute', 'É'), + ('mac_cE-circumflex', 'Ê'), + ('mac_cE-diaeresis', 'Ë'), + ('mac_cI-grave', 'Ì'), + ('mac_cI-acute', 'Í'), + ('mac_cI-circumflex', 'Î'), + ('mac_cI-diaeresis', 'Ï'), + ('mac_cEth', 'Ð'), + ('mac_cN-tilde', 'Ñ'), + ('mac_cO-grave', 'Ò'), + ('mac_cO-acute', 'Ó'), + ('mac_cO-circumflex', 'Ô'), + ('mac_cO-tilde', 'Õ'), + ('mac_cO-diaeresis', 'Ö'), + ('mac_cO-stroke', 'Ø'), + ('mac_cU-grave', 'Ù'), + ('mac_cU-acute', 'Ú'), + ('mac_cU-circumflex', 'Û'), + ('mac_cU-diaeresis', 'Ü'), + ('mac_cY-acute', 'Ý'), + ('mac_sa-grave', 'à'), + ('mac_sa-acute', 'á'), + ('mac_sa-circumflex', 'â'), + ('mac_sa-tilde', 'ã'), + ('mac_sa-diaeresis', 'ä'), + ('mac_sa-ring', 'å'), + ('mac_sae', 'æ'), + ('mac_sc-cedilla', 'ç'), + ('mac_se-grave', 'è'), + ('mac_se-acute', 'é'), + ('mac_se-circumflex', 'ê'), + ('mac_se-diaeresis', 'ë'), + ('mac_si-grave', 'ì'), + ('mac_si-acute', 'í'), + ('mac_si-circumflex', 'î'), + ('mac_si-diaeresis', 'ï'), + ('mac_sn-tilde', 'ñ'), + ('mac_so-grave', 'ò'), + ('mac_so-acute', 'ó'), + ('mac_so-circumflex', 'ô'), + ('mac_so-tilde', 'õ'), + ('mac_so-diaeresis', 'ö'), + ('mac_so-stroke', 'ø'), + ('mac_su-grave', 'ù'), + ('mac_su-acute', 'ú'), + ('mac_su-circumflex', 'û'), + ('mac_su-diaeresis', 'ü'), + ('mac_sy-acute', 'ý'), + ('mac_sy-diaeresis', 'ÿ'), + ('mac_cOE', 'Œ'), + ('mac_soe', 'œ'), + ('mac_bullet', '•'), + ('mac_franc', '₣'), + ('mac_lira', '₤'), + ('mac_rupee', '₨'), + ('mac_euro', '€'), + ('mac_spade', '♠'), + ('mac_club', '♣'), + ('mac_heart', '♥'), + ('mac_diamond', '♦'), + ('txt_dimension', '×'), ('txt_quote_single_open', '‘'), ('txt_quote_single_close', '’'), ('txt_quote_double_open', '“'), @@ -130,7 +208,6 @@ class Textile(object): ('txt_ellipsis', '…'), ('txt_emdash', '—'), ('txt_endash', '–'), - ('txt_dimension', '×'), ('txt_trademark', '™'), ('txt_registered', '®'), ('txt_copyright', '©'), @@ -593,45 +670,210 @@ class Textile(object): '

Cat’s Cradle by Vonnegut

' """ - # fix: hackish + # fix: hackish text = re.sub(r'"\Z', '\" ', text) glyph_search = ( - re.compile(r"(\w)\'(\w)"), # apostrophe's - re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88 - re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing + re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign + re.compile(r"(\w)\'(\w)"), # apostrophe's + re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88 + re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing re.compile(r'\'/'), # single opening - re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing + re.compile(r'(\")\"'), # double closing - following another + re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing re.compile(r'"'), # double opening re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase - re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis + re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis re.compile(r'(\s?)--(\s?)'), # em dash re.compile(r'\s-(?:\s|$)'), # en dash - re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign - re.compile(r'\b ?[([]TM[])]', re.I), # trademark - re.compile(r'\b ?[([]R[])]', re.I), # registered - re.compile(r'\b ?[([]C[])]', re.I), # copyright + re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark + re.compile(r'\b( ?)[([]R[])]', re.I), # registered + re.compile(r'\b( ?)[([]C[])]', re.I) # copyright ) glyph_replace = [x % dict(self.glyph_defaults) for x in ( - r'\1%(txt_apostrophe)s\2', # apostrophe's - r'\1%(txt_apostrophe)s\2', # back in '88 + r'\1\2%(txt_dimension)s\3', # dimension sign + r'\1%(txt_apostrophe)s\2', # apostrophe's + r'\1%(txt_apostrophe)s\2', # back in '88 r'\1%(txt_quote_single_close)s', # single closing - r'%(txt_quote_single_open)s', # single opening - r'\1%(txt_quote_double_close)s', # double closing - r'%(txt_quote_double_open)s', # double opening + r'%(txt_quote_single_open)s', # single opening + r'\1%(txt_quote_double_close)s', # double closing - following another + r'\1%(txt_quote_double_close)s', # double closing + r'%(txt_quote_double_open)s', # double opening r'\1', # 3+ uppercase acronym r'\1', # 3+ uppercase - r'\1%(txt_ellipsis)s', # ellipsis + r'\1%(txt_ellipsis)s', # ellipsis r'\1%(txt_emdash)s\2', # em dash r' %(txt_endash)s ', # en dash - r'\1\2%(txt_dimension)s\3', # dimension sign - r'%(txt_trademark)s', # trademark - r'%(txt_registered)s', # registered - r'%(txt_copyright)s', # copyright + r'\1%(txt_trademark)s', # trademark + r'\1%(txt_registered)s', # registered + r'\1%(txt_copyright)s' # copyright )] + if re.search(r'{.+?}', text): + glyph_search += ( + re.compile(r'{(c\||\|c)}'), # cent + re.compile(r'{(L-|-L)}'), # pound + re.compile(r'{(Y=|=Y)}'), # yen + re.compile(r'{\(c\)}'), # copyright + re.compile(r'{\(r\)}'), # registered + re.compile(r'{1/4}'), # quarter + re.compile(r'{1/2}'), # half + re.compile(r'{3/4}'), # three-quarter + re.compile(r'{(A`|`A)}'), # 192; + re.compile(r'{(A\'|\'A)}'), # 193; + re.compile(r'{(A\^|\^A)}'), # 194; + re.compile(r'{(A~|~A)}'), # 195; + re.compile(r'{(A\"|\"A)}'), # 196; + re.compile(r'{(Ao|oA)}'), # 197; + re.compile(r'{(AE)}'), # 198; + re.compile(r'{(C,|,C)}'), # 199; + re.compile(r'{(E`|`E)}'), # 200; + re.compile(r'{(E\'|\'E)}'), # 201; + re.compile(r'{(E\^|\^E)}'), # 202; + re.compile(r'{(E\"|\"E)}'), # 203; + re.compile(r'{(I`|`I)}'), # 204; + re.compile(r'{(I\'|\'I)}'), # 205; + re.compile(r'{(I\^|\^I)}'), # 206; + re.compile(r'{(I\"|\"I)}'), # 207; + re.compile(r'{(D-|-D)}'), # 208; + re.compile(r'{(N~|~N)}'), # 209; + re.compile(r'{(O`|`O)}'), # 210; + re.compile(r'{(O\'|\'O)}'), # 211; + re.compile(r'{(O\^|\^O)}'), # 212; + re.compile(r'{(O~|~O)}'), # 213; + re.compile(r'{(O\"|\"O)}'), # 214; + re.compile(r'{(O\/|\/O)}'), # 215; + re.compile(r'{(U`|`U)}'), # 216; + re.compile(r'{(U\'|\'U)}'), # 217; + re.compile(r'{(U\^|\^U)}'), # 218; + re.compile(r'{(U\"|\"U)}'), # 219; + re.compile(r'{(Y\'|\'Y)}'), # 220; + re.compile(r'{(a`|`a)}'), # a-grace + re.compile(r'{(a\'|\'a)}'), # a-acute + re.compile(r'{(a\^|\^a)}'), # a-circumflex + re.compile(r'{(a~|~a)}'), # a-tilde + re.compile(r'{(a\"|\"a)}'), # a-diaeresis + re.compile(r'{(ao|oa)}'), # a-ring + re.compile(r'{ae}'), # ae + re.compile(r'{(c,|,c)}'), # c-cedilla + re.compile(r'{(e`|`e)}'), # e-grace + re.compile(r'{(e\'|\'e)}'), # e-acute + re.compile(r'{(e\^|\^e)}'), # e-circumflex + re.compile(r'{(e\"|\"e)}'), # e-diaeresis + re.compile(r'{(i`|`i)}'), # i-grace + re.compile(r'{(i\'|\'i)}'), # i-acute + re.compile(r'{(i\^|\^i)}'), # i-circumflex + re.compile(r'{(i\"|\"i)}'), # i-diaeresis + re.compile(r'{(n~|~n)}'), # n-tilde + re.compile(r'{(o`|`o)}'), # o-grace + re.compile(r'{(o\'|\'o)}'), # o-acute + re.compile(r'{(o\^|\^o)}'), # o-circumflex + re.compile(r'{(o~|~o)}'), # o-tilde + re.compile(r'{(o\"|\"o)}'), # o-diaeresis + re.compile(r'{(o\/|\/o)}'), # o-stroke + re.compile(r'{(u`|`u)}'), # u-grace + re.compile(r'{(u\'|\'u)}'), # u-acute + re.compile(r'{(u\^|\^u)}'), # u-circumflex + re.compile(r'{(u\"|\"u)}'), # u-diaeresis + re.compile(r'{(y\'|\'y)}'), # y-acute + re.compile(r'{(y\"|\"y)}'), # y-diaeresis + re.compile(r'{OE}'), # y-diaeresis + re.compile(r'{oe}'), # y-diaeresis + re.compile(r'{\*}'), # bullet + re.compile(r'{Fr}'), # Franc + re.compile(r'{(L=|=L)}'), # Lira + re.compile(r'{Rs}'), # Rupee + re.compile(r'{(C=|=C)}'), # euro + re.compile(r'{tm}'), # euro + re.compile(r'{spade}'), # spade + re.compile(r'{club}'), # club + re.compile(r'{heart}'), # heart + re.compile(r'{diamond}') # diamond + ) + + glyph_replace += [x % dict(self.glyph_defaults) for x in ( + r'%(mac_cent)s', # cent + r'%(mac_pound)s', # pound + r'%(mac_yen)s', # yen + r'%(txt_copyright)s', # copyright + r'%(txt_registered)s', # registered + r'%(mac_quarter)s', # quarter + r'%(mac_half)s', # half + r'%(mac_three-quarter)s', # three-quarter + r'%(mac_cA-grave)s', # 192; + r'%(mac_cA-acute)s', # 193; + r'%(mac_cA-circumflex)s', # 194; + r'%(mac_cA-tilde)s', # 195; + r'%(mac_cA-diaeresis)s', # 196; + r'%(mac_cA-ring)s', # 197; + r'%(mac_cAE)s', # 198; + r'%(mac_cC-cedilla)s', # 199; + r'%(mac_cE-grave)s', # 200; + r'%(mac_cE-acute)s', # 201; + r'%(mac_cE-circumflex)s', # 202; + r'%(mac_cE-diaeresis)s', # 203; + r'%(mac_cI-grave)s', # 204; + r'%(mac_cI-acute)s', # 205; + r'%(mac_cI-circumflex)s', # 206; + r'%(mac_cI-diaeresis)s', # 207; + r'%(mac_cEth)s', # 208; + r'%(mac_cN-tilde)s', # 209; + r'%(mac_cO-grave)s', # 210; + r'%(mac_cO-acute)s', # 211; + r'%(mac_cO-circumflex)s', # 212; + r'%(mac_cO-tilde)s', # 213; + r'%(mac_cO-diaeresis)s', # 214; + r'%(mac_cO-stroke)s', # 216; + r'%(mac_cU-grave)s', # 217; + r'%(mac_cU-acute)s', # 218; + r'%(mac_cU-circumflex)s', # 219; + r'%(mac_cU-diaeresis)s', # 220; + r'%(mac_cY-acute)s', # 221; + r'%(mac_sa-grave)s', # 224; + r'%(mac_sa-acute)s', # 225; + r'%(mac_sa-circumflex)s', # 226; + r'%(mac_sa-tilde)s', # 227; + r'%(mac_sa-diaeresis)s', # 228; + r'%(mac_sa-ring)s', # 229; + r'%(mac_sae)s', # 230; + r'%(mac_sc-cedilla)s', # 231; + r'%(mac_se-grave)s', # 232; + r'%(mac_se-acute)s', # 233; + r'%(mac_se-circumflex)s', # 234; + r'%(mac_se-diaeresis)s', # 235; + r'%(mac_si-grave)s', # 236; + r'%(mac_si-acute)s', # 237; + r'%(mac_si-circumflex)s', # 238; + r'%(mac_si-diaeresis)s', # 239; + r'%(mac_sn-tilde)s', # 241; + r'%(mac_so-grave)s', # 242; + r'%(mac_so-acute)s', # 243; + r'%(mac_so-circumflex)s', # 244; + r'%(mac_so-tilde)s', # 245; + r'%(mac_so-diaeresis)s', # 246; + r'%(mac_so-stroke)s', # 248; + r'%(mac_su-grave)s', # 249; + r'%(mac_su-acute)s', # 250; + r'%(mac_su-circumflex)s', # 251; + r'%(mac_su-diaeresis)s', # 252; + r'%(mac_sy-acute)s', # 253; + r'%(mac_sy-diaeresis)s', # 255; + r'%(mac_cOE)s', # 338; + r'%(mac_soe)s', # 339; + r'%(mac_bullet)s', # bullet + r'%(mac_franc)s', # franc + r'%(mac_lira)s', # lira + r'%(mac_rupee)s', # rupee + r'%(mac_euro)s', # euro + r'%(txt_trademark)s', # trademark + r'%(mac_spade)s', # spade + r'%(mac_club)s', # club + r'%(mac_heart)s', # heart + r'%(mac_diamond)s' # diamond + )] + result = [] for line in re.compile(r'(<.*?>)', re.U).split(text): if not re.search(r'<.*>', line): From 6dc2c147afd4bbd54cd28af5b39528a4da25c2c1 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 17 Mar 2011 21:17:00 -0400 Subject: [PATCH 2/2] TXT Input: Textile: Make the header show that the file has diverged from upstream. --- src/calibre/ebooks/textile/functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index b46384ae93..891211de30 100755 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -5,9 +5,9 @@ PyTextile A Humane Web Text Generator """ -__version__ = '2.1.4' - -__date__ = '2009/12/04' +# Last upstream version basis +# __version__ = '2.1.4' +#__date__ = '2009/12/04' __copyright__ = """ Copyright (c) 2011, Leigh Parry