mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
commit
0523376c25
264
src/calibre/ebooks/textile/functions.py
Normal file → Executable file
264
src/calibre/ebooks/textile/functions.py
Normal file → Executable file
@ -5,11 +5,13 @@ PyTextile
|
||||
A Humane Web Text Generator
|
||||
"""
|
||||
|
||||
__version__ = '2.1.4'
|
||||
|
||||
__date__ = '2009/12/04'
|
||||
# Last upstream version basis
|
||||
# __version__ = '2.1.4'
|
||||
#__date__ = '2009/12/04'
|
||||
|
||||
__copyright__ = """
|
||||
Copyright (c) 2011, Leigh Parry
|
||||
Copyright (c) 2011, John Schember <john@nachtimwald.com>
|
||||
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
|
||||
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
|
||||
Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
|
||||
@ -120,6 +122,82 @@ class Textile(object):
|
||||
btag_lite = ('bq', 'bc', 'p')
|
||||
|
||||
glyph_defaults = (
|
||||
('mac_cent', '¢'),
|
||||
('mac_pound', '£'),
|
||||
('mac_yen', '¥'),
|
||||
('mac_quarter', '¼'),
|
||||
('mac_half', '½'),
|
||||
('mac_three-quarter', '¾'),
|
||||
('mac_cA-grave', 'À'),
|
||||
('mac_cA-acute', 'Á'),
|
||||
('mac_cA-circumflex', 'Â'),
|
||||
('mac_cA-tilde', 'Ã'),
|
||||
('mac_cA-diaeresis', 'Ä'),
|
||||
('mac_cA-ring', 'Å'),
|
||||
('mac_cAE', 'Æ'),
|
||||
('mac_cC-cedilla', 'Ç'),
|
||||
('mac_cE-grave', 'È'),
|
||||
('mac_cE-acute', 'É'),
|
||||
('mac_cE-circumflex', 'Ê'),
|
||||
('mac_cE-diaeresis', 'Ë'),
|
||||
('mac_cI-grave', 'Ì'),
|
||||
('mac_cI-acute', 'Í'),
|
||||
('mac_cI-circumflex', 'Î'),
|
||||
('mac_cI-diaeresis', 'Ï'),
|
||||
('mac_cEth', 'Ð'),
|
||||
('mac_cN-tilde', 'Ñ'),
|
||||
('mac_cO-grave', 'Ò'),
|
||||
('mac_cO-acute', 'Ó'),
|
||||
('mac_cO-circumflex', 'Ô'),
|
||||
('mac_cO-tilde', 'Õ'),
|
||||
('mac_cO-diaeresis', 'Ö'),
|
||||
('mac_cO-stroke', 'Ø'),
|
||||
('mac_cU-grave', 'Ù'),
|
||||
('mac_cU-acute', 'Ú'),
|
||||
('mac_cU-circumflex', 'Û'),
|
||||
('mac_cU-diaeresis', 'Ü'),
|
||||
('mac_cY-acute', 'Ý'),
|
||||
('mac_sa-grave', 'à'),
|
||||
('mac_sa-acute', 'á'),
|
||||
('mac_sa-circumflex', 'â'),
|
||||
('mac_sa-tilde', 'ã'),
|
||||
('mac_sa-diaeresis', 'ä'),
|
||||
('mac_sa-ring', 'å'),
|
||||
('mac_sae', 'æ'),
|
||||
('mac_sc-cedilla', 'ç'),
|
||||
('mac_se-grave', 'è'),
|
||||
('mac_se-acute', 'é'),
|
||||
('mac_se-circumflex', 'ê'),
|
||||
('mac_se-diaeresis', 'ë'),
|
||||
('mac_si-grave', 'ì'),
|
||||
('mac_si-acute', 'í'),
|
||||
('mac_si-circumflex', 'î'),
|
||||
('mac_si-diaeresis', 'ï'),
|
||||
('mac_sn-tilde', 'ñ'),
|
||||
('mac_so-grave', 'ò'),
|
||||
('mac_so-acute', 'ó'),
|
||||
('mac_so-circumflex', 'ô'),
|
||||
('mac_so-tilde', 'õ'),
|
||||
('mac_so-diaeresis', 'ö'),
|
||||
('mac_so-stroke', 'ø'),
|
||||
('mac_su-grave', 'ù'),
|
||||
('mac_su-acute', 'ú'),
|
||||
('mac_su-circumflex', 'û'),
|
||||
('mac_su-diaeresis', 'ü'),
|
||||
('mac_sy-acute', 'ý'),
|
||||
('mac_sy-diaeresis', 'ÿ'),
|
||||
('mac_cOE', 'Œ'),
|
||||
('mac_soe', 'œ'),
|
||||
('mac_bullet', '•'),
|
||||
('mac_franc', '₣'),
|
||||
('mac_lira', '₤'),
|
||||
('mac_rupee', '₨'),
|
||||
('mac_euro', '€'),
|
||||
('mac_spade', '♠'),
|
||||
('mac_club', '♣'),
|
||||
('mac_heart', '♥'),
|
||||
('mac_diamond', '♦'),
|
||||
('txt_dimension', '×'),
|
||||
('txt_quote_single_open', '‘'),
|
||||
('txt_quote_single_close', '’'),
|
||||
('txt_quote_double_open', '“'),
|
||||
@ -130,7 +208,6 @@ class Textile(object):
|
||||
('txt_ellipsis', '…'),
|
||||
('txt_emdash', '—'),
|
||||
('txt_endash', '–'),
|
||||
('txt_dimension', '×'),
|
||||
('txt_trademark', '™'),
|
||||
('txt_registered', '®'),
|
||||
('txt_copyright', '©'),
|
||||
@ -597,10 +674,12 @@ class Textile(object):
|
||||
text = re.sub(r'"\Z', '\" ', text)
|
||||
|
||||
glyph_search = (
|
||||
re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign
|
||||
re.compile(r"(\w)\'(\w)"), # apostrophe's
|
||||
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
|
||||
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
|
||||
re.compile(r'\'/'), # single opening
|
||||
re.compile(r'(\")\"'), # double closing - following another
|
||||
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
|
||||
re.compile(r'"'), # double opening
|
||||
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
|
||||
@ -608,17 +687,18 @@ class Textile(object):
|
||||
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
|
||||
re.compile(r'(\s?)--(\s?)'), # em dash
|
||||
re.compile(r'\s-(?:\s|$)'), # en dash
|
||||
re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign
|
||||
re.compile(r'\b ?[([]TM[])]', re.I), # trademark
|
||||
re.compile(r'\b ?[([]R[])]', re.I), # registered
|
||||
re.compile(r'\b ?[([]C[])]', re.I), # copyright
|
||||
re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark
|
||||
re.compile(r'\b( ?)[([]R[])]', re.I), # registered
|
||||
re.compile(r'\b( ?)[([]C[])]', re.I) # copyright
|
||||
)
|
||||
|
||||
glyph_replace = [x % dict(self.glyph_defaults) for x in (
|
||||
r'\1\2%(txt_dimension)s\3', # dimension sign
|
||||
r'\1%(txt_apostrophe)s\2', # apostrophe's
|
||||
r'\1%(txt_apostrophe)s\2', # back in '88
|
||||
r'\1%(txt_quote_single_close)s', # single closing
|
||||
r'%(txt_quote_single_open)s', # single opening
|
||||
r'\1%(txt_quote_double_close)s', # double closing - following another
|
||||
r'\1%(txt_quote_double_close)s', # double closing
|
||||
r'%(txt_quote_double_open)s', # double opening
|
||||
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
|
||||
@ -626,10 +706,172 @@ class Textile(object):
|
||||
r'\1%(txt_ellipsis)s', # ellipsis
|
||||
r'\1%(txt_emdash)s\2', # em dash
|
||||
r' %(txt_endash)s ', # en dash
|
||||
r'\1\2%(txt_dimension)s\3', # dimension sign
|
||||
r'%(txt_trademark)s', # trademark
|
||||
r'%(txt_registered)s', # registered
|
||||
r'\1%(txt_trademark)s', # trademark
|
||||
r'\1%(txt_registered)s', # registered
|
||||
r'\1%(txt_copyright)s' # copyright
|
||||
)]
|
||||
|
||||
if re.search(r'{.+?}', text):
|
||||
glyph_search += (
|
||||
re.compile(r'{(c\||\|c)}'), # cent
|
||||
re.compile(r'{(L-|-L)}'), # pound
|
||||
re.compile(r'{(Y=|=Y)}'), # yen
|
||||
re.compile(r'{\(c\)}'), # copyright
|
||||
re.compile(r'{\(r\)}'), # registered
|
||||
re.compile(r'{1/4}'), # quarter
|
||||
re.compile(r'{1/2}'), # half
|
||||
re.compile(r'{3/4}'), # three-quarter
|
||||
re.compile(r'{(A`|`A)}'), # 192;
|
||||
re.compile(r'{(A\'|\'A)}'), # 193;
|
||||
re.compile(r'{(A\^|\^A)}'), # 194;
|
||||
re.compile(r'{(A~|~A)}'), # 195;
|
||||
re.compile(r'{(A\"|\"A)}'), # 196;
|
||||
re.compile(r'{(Ao|oA)}'), # 197;
|
||||
re.compile(r'{(AE)}'), # 198;
|
||||
re.compile(r'{(C,|,C)}'), # 199;
|
||||
re.compile(r'{(E`|`E)}'), # 200;
|
||||
re.compile(r'{(E\'|\'E)}'), # 201;
|
||||
re.compile(r'{(E\^|\^E)}'), # 202;
|
||||
re.compile(r'{(E\"|\"E)}'), # 203;
|
||||
re.compile(r'{(I`|`I)}'), # 204;
|
||||
re.compile(r'{(I\'|\'I)}'), # 205;
|
||||
re.compile(r'{(I\^|\^I)}'), # 206;
|
||||
re.compile(r'{(I\"|\"I)}'), # 207;
|
||||
re.compile(r'{(D-|-D)}'), # 208;
|
||||
re.compile(r'{(N~|~N)}'), # 209;
|
||||
re.compile(r'{(O`|`O)}'), # 210;
|
||||
re.compile(r'{(O\'|\'O)}'), # 211;
|
||||
re.compile(r'{(O\^|\^O)}'), # 212;
|
||||
re.compile(r'{(O~|~O)}'), # 213;
|
||||
re.compile(r'{(O\"|\"O)}'), # 214;
|
||||
re.compile(r'{(O\/|\/O)}'), # 215;
|
||||
re.compile(r'{(U`|`U)}'), # 216;
|
||||
re.compile(r'{(U\'|\'U)}'), # 217;
|
||||
re.compile(r'{(U\^|\^U)}'), # 218;
|
||||
re.compile(r'{(U\"|\"U)}'), # 219;
|
||||
re.compile(r'{(Y\'|\'Y)}'), # 220;
|
||||
re.compile(r'{(a`|`a)}'), # a-grace
|
||||
re.compile(r'{(a\'|\'a)}'), # a-acute
|
||||
re.compile(r'{(a\^|\^a)}'), # a-circumflex
|
||||
re.compile(r'{(a~|~a)}'), # a-tilde
|
||||
re.compile(r'{(a\"|\"a)}'), # a-diaeresis
|
||||
re.compile(r'{(ao|oa)}'), # a-ring
|
||||
re.compile(r'{ae}'), # ae
|
||||
re.compile(r'{(c,|,c)}'), # c-cedilla
|
||||
re.compile(r'{(e`|`e)}'), # e-grace
|
||||
re.compile(r'{(e\'|\'e)}'), # e-acute
|
||||
re.compile(r'{(e\^|\^e)}'), # e-circumflex
|
||||
re.compile(r'{(e\"|\"e)}'), # e-diaeresis
|
||||
re.compile(r'{(i`|`i)}'), # i-grace
|
||||
re.compile(r'{(i\'|\'i)}'), # i-acute
|
||||
re.compile(r'{(i\^|\^i)}'), # i-circumflex
|
||||
re.compile(r'{(i\"|\"i)}'), # i-diaeresis
|
||||
re.compile(r'{(n~|~n)}'), # n-tilde
|
||||
re.compile(r'{(o`|`o)}'), # o-grace
|
||||
re.compile(r'{(o\'|\'o)}'), # o-acute
|
||||
re.compile(r'{(o\^|\^o)}'), # o-circumflex
|
||||
re.compile(r'{(o~|~o)}'), # o-tilde
|
||||
re.compile(r'{(o\"|\"o)}'), # o-diaeresis
|
||||
re.compile(r'{(o\/|\/o)}'), # o-stroke
|
||||
re.compile(r'{(u`|`u)}'), # u-grace
|
||||
re.compile(r'{(u\'|\'u)}'), # u-acute
|
||||
re.compile(r'{(u\^|\^u)}'), # u-circumflex
|
||||
re.compile(r'{(u\"|\"u)}'), # u-diaeresis
|
||||
re.compile(r'{(y\'|\'y)}'), # y-acute
|
||||
re.compile(r'{(y\"|\"y)}'), # y-diaeresis
|
||||
re.compile(r'{OE}'), # y-diaeresis
|
||||
re.compile(r'{oe}'), # y-diaeresis
|
||||
re.compile(r'{\*}'), # bullet
|
||||
re.compile(r'{Fr}'), # Franc
|
||||
re.compile(r'{(L=|=L)}'), # Lira
|
||||
re.compile(r'{Rs}'), # Rupee
|
||||
re.compile(r'{(C=|=C)}'), # euro
|
||||
re.compile(r'{tm}'), # euro
|
||||
re.compile(r'{spade}'), # spade
|
||||
re.compile(r'{club}'), # club
|
||||
re.compile(r'{heart}'), # heart
|
||||
re.compile(r'{diamond}') # diamond
|
||||
)
|
||||
|
||||
glyph_replace += [x % dict(self.glyph_defaults) for x in (
|
||||
r'%(mac_cent)s', # cent
|
||||
r'%(mac_pound)s', # pound
|
||||
r'%(mac_yen)s', # yen
|
||||
r'%(txt_copyright)s', # copyright
|
||||
r'%(txt_registered)s', # registered
|
||||
r'%(mac_quarter)s', # quarter
|
||||
r'%(mac_half)s', # half
|
||||
r'%(mac_three-quarter)s', # three-quarter
|
||||
r'%(mac_cA-grave)s', # 192;
|
||||
r'%(mac_cA-acute)s', # 193;
|
||||
r'%(mac_cA-circumflex)s', # 194;
|
||||
r'%(mac_cA-tilde)s', # 195;
|
||||
r'%(mac_cA-diaeresis)s', # 196;
|
||||
r'%(mac_cA-ring)s', # 197;
|
||||
r'%(mac_cAE)s', # 198;
|
||||
r'%(mac_cC-cedilla)s', # 199;
|
||||
r'%(mac_cE-grave)s', # 200;
|
||||
r'%(mac_cE-acute)s', # 201;
|
||||
r'%(mac_cE-circumflex)s', # 202;
|
||||
r'%(mac_cE-diaeresis)s', # 203;
|
||||
r'%(mac_cI-grave)s', # 204;
|
||||
r'%(mac_cI-acute)s', # 205;
|
||||
r'%(mac_cI-circumflex)s', # 206;
|
||||
r'%(mac_cI-diaeresis)s', # 207;
|
||||
r'%(mac_cEth)s', # 208;
|
||||
r'%(mac_cN-tilde)s', # 209;
|
||||
r'%(mac_cO-grave)s', # 210;
|
||||
r'%(mac_cO-acute)s', # 211;
|
||||
r'%(mac_cO-circumflex)s', # 212;
|
||||
r'%(mac_cO-tilde)s', # 213;
|
||||
r'%(mac_cO-diaeresis)s', # 214;
|
||||
r'%(mac_cO-stroke)s', # 216;
|
||||
r'%(mac_cU-grave)s', # 217;
|
||||
r'%(mac_cU-acute)s', # 218;
|
||||
r'%(mac_cU-circumflex)s', # 219;
|
||||
r'%(mac_cU-diaeresis)s', # 220;
|
||||
r'%(mac_cY-acute)s', # 221;
|
||||
r'%(mac_sa-grave)s', # 224;
|
||||
r'%(mac_sa-acute)s', # 225;
|
||||
r'%(mac_sa-circumflex)s', # 226;
|
||||
r'%(mac_sa-tilde)s', # 227;
|
||||
r'%(mac_sa-diaeresis)s', # 228;
|
||||
r'%(mac_sa-ring)s', # 229;
|
||||
r'%(mac_sae)s', # 230;
|
||||
r'%(mac_sc-cedilla)s', # 231;
|
||||
r'%(mac_se-grave)s', # 232;
|
||||
r'%(mac_se-acute)s', # 233;
|
||||
r'%(mac_se-circumflex)s', # 234;
|
||||
r'%(mac_se-diaeresis)s', # 235;
|
||||
r'%(mac_si-grave)s', # 236;
|
||||
r'%(mac_si-acute)s', # 237;
|
||||
r'%(mac_si-circumflex)s', # 238;
|
||||
r'%(mac_si-diaeresis)s', # 239;
|
||||
r'%(mac_sn-tilde)s', # 241;
|
||||
r'%(mac_so-grave)s', # 242;
|
||||
r'%(mac_so-acute)s', # 243;
|
||||
r'%(mac_so-circumflex)s', # 244;
|
||||
r'%(mac_so-tilde)s', # 245;
|
||||
r'%(mac_so-diaeresis)s', # 246;
|
||||
r'%(mac_so-stroke)s', # 248;
|
||||
r'%(mac_su-grave)s', # 249;
|
||||
r'%(mac_su-acute)s', # 250;
|
||||
r'%(mac_su-circumflex)s', # 251;
|
||||
r'%(mac_su-diaeresis)s', # 252;
|
||||
r'%(mac_sy-acute)s', # 253;
|
||||
r'%(mac_sy-diaeresis)s', # 255;
|
||||
r'%(mac_cOE)s', # 338;
|
||||
r'%(mac_soe)s', # 339;
|
||||
r'%(mac_bullet)s', # bullet
|
||||
r'%(mac_franc)s', # franc
|
||||
r'%(mac_lira)s', # lira
|
||||
r'%(mac_rupee)s', # rupee
|
||||
r'%(mac_euro)s', # euro
|
||||
r'%(txt_trademark)s', # trademark
|
||||
r'%(mac_spade)s', # spade
|
||||
r'%(mac_club)s', # club
|
||||
r'%(mac_heart)s', # heart
|
||||
r'%(mac_diamond)s' # diamond
|
||||
)]
|
||||
|
||||
result = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user