mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #822744 (Unwrap problem - some central european chars missing)
This commit is contained in:
commit
ca9048cdae
@ -343,6 +343,7 @@ class HTMLPreProcessor(object):
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
|
||||
|
||||
# ` with letter before
|
||||
(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
|
||||
(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
|
||||
@ -364,10 +365,14 @@ class HTMLPreProcessor(object):
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'É'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'í'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Í'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*l', re.UNICODE), lambda match: u'ĺ'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*L', re.UNICODE), lambda match: u'Ĺ'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ó'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ó'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*n', re.UNICODE), lambda match: u'ń'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*N', re.UNICODE), lambda match: u'Ń'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*r', re.UNICODE), lambda match: u'ŕ'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*R', re.UNICODE), lambda match: u'Ŕ'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*s', re.UNICODE), lambda match: u'ś'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*S', re.UNICODE), lambda match: u'Ś'),
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ú'),
|
||||
@ -400,7 +405,31 @@ class HTMLPreProcessor(object):
|
||||
# ˙
|
||||
(re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
|
||||
(re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
|
||||
|
||||
|
||||
# ˇ
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'č'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Č'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*d', re.UNICODE), lambda match: u'ď'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*D', re.UNICODE), lambda match: u'Ď'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ě'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ě'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*l', re.UNICODE), lambda match: u'ľ'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*L', re.UNICODE), lambda match: u'Ľ'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*n', re.UNICODE), lambda match: u'ň'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*N', re.UNICODE), lambda match: u'Ň'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*r', re.UNICODE), lambda match: u'ř'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*R', re.UNICODE), lambda match: u'Ř'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*s', re.UNICODE), lambda match: u'š'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*S', re.UNICODE), lambda match: u'Š'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*t', re.UNICODE), lambda match: u'ť'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*T', re.UNICODE), lambda match: u'Ť'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ž'),
|
||||
(re.compile(u'ˇ\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ž'),
|
||||
|
||||
# °
|
||||
(re.compile(u'°\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ů'),
|
||||
(re.compile(u'°\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ů'),
|
||||
|
||||
# If pdf printed from a browser then the header/footer has a reliable pattern
|
||||
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
|
||||
|
||||
@ -510,7 +539,7 @@ class HTMLPreProcessor(object):
|
||||
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
|
||||
end_rules.append(
|
||||
# Un wrap using punctuation
|
||||
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||
)
|
||||
|
||||
for rule in self.PREPROCESS + start_rules:
|
||||
|
@ -315,9 +315,11 @@ class HeuristicProcessor(object):
|
||||
supports a range of html markup and text files
|
||||
'''
|
||||
# define the pieces of the regex
|
||||
lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßôľščťžňďěřů,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
|
||||
|
||||
lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
|
||||
em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])"
|
||||
soft_hyphen = u"\xad"
|
||||
dash = u"\x2d" # some ocrs doesn't convert dashes to hyphens
|
||||
line_ending = "\s*</(span|[iubp]|div)>\s*(</(span|[iubp]|div)>)?"
|
||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
||||
line_opening = "<(span|[iubp]|div)[^>]*>\s*(<(span|[iubp]|div)[^>]*>)?\s*"
|
||||
@ -326,19 +328,23 @@ class HeuristicProcessor(object):
|
||||
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
||||
em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening
|
||||
shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening
|
||||
dash_unwrap_regex = dash+line_ending+blanklines+line_opening
|
||||
|
||||
if format == 'txt':
|
||||
unwrap_regex = lookahead+txt_line_wrap
|
||||
em_en_unwrap_regex = em_en_lookahead+txt_line_wrap
|
||||
shy_unwrap_regex = soft_hyphen+txt_line_wrap
|
||||
dash_unwrap_regex = dash+txt_line_wrap
|
||||
|
||||
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
||||
em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
|
||||
shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
|
||||
dash_unwrap = re.compile(u"%s" % dash_unwrap_regex, re.UNICODE)
|
||||
|
||||
content = unwrap.sub(' ', content)
|
||||
content = em_en_unwrap.sub('', content)
|
||||
content = shy_unwrap.sub('', content)
|
||||
content = dash_unwrap.sub('', content)
|
||||
return content
|
||||
|
||||
def txt_process(self, match):
|
||||
|
@ -196,10 +196,40 @@ class Textile(object):
|
||||
(re.compile(r'{(u\"|\"u)}'), r'ü'), # u-diaeresis
|
||||
(re.compile(r'{(y\'|\'y)}'), r'ý'), # y-acute
|
||||
(re.compile(r'{(y\"|\"y)}'), r'ÿ'), # y-diaeresis
|
||||
|
||||
(re.compile(r'{(C\ˇ|\ˇC)}'), r'Č'), # C-caron
|
||||
(re.compile(r'{(c\ˇ|\ˇc)}'), r'č'), # c-caron
|
||||
(re.compile(r'{(D\ˇ|\ˇD)}'), r'Ď'), # D-caron
|
||||
(re.compile(r'{(d\ˇ|\ˇd)}'), r'ď'), # d-caron
|
||||
(re.compile(r'{(E\ˇ|\ˇE)}'), r'Ě'), # E-caron
|
||||
(re.compile(r'{(e\ˇ|\ˇe)}'), r'ě'), # e-caron
|
||||
(re.compile(r'{(L\'|\'L)}'), r'Ĺ'), # L-acute
|
||||
(re.compile(r'{(l\'|\'l)}'), r'ĺ'), # l-acute
|
||||
(re.compile(r'{(L\ˇ|\ˇL)}'), r'Ľ'), # L-caron
|
||||
(re.compile(r'{(l\ˇ|\ˇl)}'), r'ľ'), # l-caron
|
||||
(re.compile(r'{(N\ˇ|\ˇN)}'), r'Ň'), # N-caron
|
||||
(re.compile(r'{(n\ˇ|\ˇn)}'), r'ň'), # n-caron
|
||||
|
||||
(re.compile(r'{OE}'), r'Œ'), # OE
|
||||
(re.compile(r'{oe}'), r'œ'), # oe
|
||||
(re.compile(r'{(S\^|\^S)}'), r'Š'), # Scaron
|
||||
(re.compile(r'{(s\^|\^s)}'), r'š'), # scaron
|
||||
|
||||
(re.compile(r'{(R\'|\'R)}'), r'Ŕ'), # R-acute
|
||||
(re.compile(r'{(r\'|\'r)}'), r'ŕ'), # r-acute
|
||||
(re.compile(r'{(R\ˇ|\ˇR)}'), r'Ř'), # R-caron
|
||||
(re.compile(r'{(r\ˇ|\ˇr)}'), r'ř'), # r-caron
|
||||
|
||||
(re.compile(r'{(S\^|\^S)}'), r'Ŝ'), # S-circumflex
|
||||
(re.compile(r'{(s\^|\^s)}'), r'ŝ'), # s-circumflex
|
||||
|
||||
(re.compile(r'{(S\ˇ|\ˇS)}'), r'Š'), # S-caron
|
||||
(re.compile(r'{(s\ˇ|\ˇs)}'), r'š'), # s-caron
|
||||
(re.compile(r'{(T\ˇ|\ˇT)}'), r'Ť'), # T-caron
|
||||
(re.compile(r'{(t\ˇ|\ˇt)}'), r'ť'), # t-caron
|
||||
(re.compile(r'{(U\°|\°U)}'), r'Ů'), # U-ring
|
||||
(re.compile(r'{(u\°|\°u)}'), r'ů'), # u-ring
|
||||
(re.compile(r'{(Z\ˇ|\ˇZ)}'), r'Ž'), # Z-caron
|
||||
(re.compile(r'{(z\ˇ|\ˇz)}'), r'ž'), # z-caron
|
||||
|
||||
(re.compile(r'{\*}'), r'•'), # bullet
|
||||
(re.compile(r'{Fr}'), r'₣'), # Franc
|
||||
(re.compile(r'{(L=|=L)}'), r'₤'), # Lira
|
||||
@ -219,13 +249,13 @@ class Textile(object):
|
||||
]
|
||||
glyph_defaults = [
|
||||
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
||||
(re.compile(r'(\d+)\'(\s)', re.I), r'\1′\2'), # prime
|
||||
(re.compile(r'(\d+)\"(\s)', re.I), r'\1″\2'), # prime-double
|
||||
(re.compile(r'(\d+)\'(\s)', re.I), r'\1′\2'), # prime
|
||||
(re.compile(r'(\d+)\"(\s)', re.I), r'\1″\2'), # prime-double
|
||||
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
||||
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
||||
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
||||
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
|
||||
(re.compile(r'(^|[^-])--([^-]|$)'), r'\1—\2'), # em dash
|
||||
(re.compile(r'(^|[^-])--([^-]|$)'), r'\1—\2'), # em dash
|
||||
(re.compile(r'\s-(?:\s|$)'), r' – '), # en dash
|
||||
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1™'), # trademark
|
||||
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1®'), # registered
|
||||
|
@ -26,7 +26,7 @@ def unsmarten(txt):
|
||||
txt = re.sub(u'¾|¾|¾', r'{3/4}', txt) # three-quarter
|
||||
txt = re.sub(u'À|À|À', r'{A`)}', txt) # A-grave
|
||||
txt = re.sub(u'Á|Á|Á', r"{A'}", txt) # A-acute
|
||||
txt = re.sub(u'Â|Â|Â', r'{A^}', txt) # A-circumflex
|
||||
txt = re.sub(u'Â|Â|Â', r'{A^}', txt) # A-circumflex
|
||||
txt = re.sub(u'Ã|Ã|Ã', r'{A~}', txt) # A-tilde
|
||||
txt = re.sub(u'Ä|Ä|Ä', r'{A"}', txt) # A-umlaut
|
||||
txt = re.sub(u'Å|Å|Å', r'{Ao}', txt) # A-ring
|
||||
@ -34,30 +34,30 @@ def unsmarten(txt):
|
||||
txt = re.sub(u'Ç|Ç|Ç', r'{C,}', txt) # C-cedilla
|
||||
txt = re.sub(u'È|È|È', r'{E`}', txt) # E-grave
|
||||
txt = re.sub(u'É|É|É', r"{E'}", txt) # E-acute
|
||||
txt = re.sub(u'Ê|Ê|Ê', r'{E^}', txt) # E-circumflex
|
||||
txt = re.sub(u'Ê|Ê|Ê', r'{E^}', txt) # E-circumflex
|
||||
txt = re.sub(u'Ë|Ë|Ë', r'{E"}', txt) # E-umlaut
|
||||
txt = re.sub(u'Ì|Ì|Ì', r'{I`}', txt) # I-grave
|
||||
txt = re.sub(u'Í|Í|Í', r"{I'}", txt) # I-acute
|
||||
txt = re.sub(u'Î|Î|Î', r'{I^}', txt) # I-circumflex
|
||||
txt = re.sub(u'Î|Î|Î', r'{I^}', txt) # I-circumflex
|
||||
txt = re.sub(u'Ï|Ï|Ï', r'{I"}', txt) # I-umlaut
|
||||
txt = re.sub(u'Ð|Ð|Ð', r'{D-}', txt) # ETH
|
||||
txt = re.sub(u'Ñ|Ñ|Ñ', r'{N~}', txt) # N-tilde
|
||||
txt = re.sub(u'Ò|Ò|Ò', r'{O`}', txt) # O-grave
|
||||
txt = re.sub(u'Ó|Ó|Ó', r"{O'}", txt) # O-acute
|
||||
txt = re.sub(u'Ô|Ô|Ô', r'{O^}', txt) # O-circumflex
|
||||
txt = re.sub(u'Ô|Ô|Ô', r'{O^}', txt) # O-circumflex
|
||||
txt = re.sub(u'Õ|Õ|Õ', r'{O~}', txt) # O-tilde
|
||||
txt = re.sub(u'Ö|Ö|Ö', r'{O"}', txt) # O-umlaut
|
||||
txt = re.sub(u'×|×|×', r'{x}', txt) # dimension
|
||||
txt = re.sub(u'Ø|Ø|Ø', r'{O/}', txt) # O-slash
|
||||
txt = re.sub(u'Ù|Ù|Ù', r"{U`}", txt) # U-grave
|
||||
txt = re.sub(u'Ú|Ú|Ú', r"{U'}", txt) # U-acute
|
||||
txt = re.sub(u'Û|Û|Û', r'{U^}', txt) # U-circumflex
|
||||
txt = re.sub(u'Û|Û|Û', r'{U^}', txt) # U-circumflex
|
||||
txt = re.sub(u'Ü|Ü|Ü', r'{U"}', txt) # U-umlaut
|
||||
txt = re.sub(u'Ý|Ý|Ý', r"{Y'}", txt) # Y-grave
|
||||
txt = re.sub(u'ß|ß|ß', r'{sz}', txt) # sharp-s
|
||||
txt = re.sub(u'à|à|à', r'{a`}', txt) # a-grave
|
||||
txt = re.sub(u'á|á|á', r"{a'}", txt) # a-acute
|
||||
txt = re.sub(u'â|â|â', r'{a^}', txt) # a-circumflex
|
||||
txt = re.sub(u'â|â|â', r'{a^}', txt) # a-circumflex
|
||||
txt = re.sub(u'ã|ã|ã', r'{a~}', txt) # a-tilde
|
||||
txt = re.sub(u'ä|ä|ä', r'{a"}', txt) # a-umlaut
|
||||
txt = re.sub(u'å|å|å', r'{ao}', txt) # a-ring
|
||||
@ -65,30 +65,58 @@ def unsmarten(txt):
|
||||
txt = re.sub(u'ç|ç|ç', r'{c,}', txt) # c-cedilla
|
||||
txt = re.sub(u'è|è|è', r'{e`}', txt) # e-grave
|
||||
txt = re.sub(u'é|é|é', r"{e'}", txt) # e-acute
|
||||
txt = re.sub(u'ê|ê|ê', r'{e^}', txt) # e-circumflex
|
||||
txt = re.sub(u'ê|ê|ê', r'{e^}', txt) # e-circumflex
|
||||
txt = re.sub(u'ë|ë|ë', r'{e"}', txt) # e-umlaut
|
||||
txt = re.sub(u'ì|ì|ì', r'{i`}', txt) # i-grave
|
||||
txt = re.sub(u'í|í|í', r"{i'}", txt) # i-acute
|
||||
txt = re.sub(u'î|î|î', r'{i^}', txt) # i-circumflex
|
||||
txt = re.sub(u'î|î|î', r'{i^}', txt) # i-circumflex
|
||||
txt = re.sub(u'ï|ï|ï', r'{i"}', txt) # i-umlaut
|
||||
txt = re.sub(u'ð|ð|ð', r'{d-}', txt) # eth
|
||||
txt = re.sub(u'ñ|ñ|ñ', r'{n~}', txt) # n-tilde
|
||||
txt = re.sub(u'ò|ò|ò', r'{o`}', txt) # o-grave
|
||||
txt = re.sub(u'ó|ó|ó', r"{o'}", txt) # o-acute
|
||||
txt = re.sub(u'ô|ô|ô', r'{o^}', txt) # o-circumflex
|
||||
txt = re.sub(u'ô|ô|ô', r'{o^}', txt) # o-circumflex
|
||||
txt = re.sub(u'õ|õ|õ', r'{o~}', txt) # o-tilde
|
||||
txt = re.sub(u'ö|ö|ö', r'{o"}', txt) # o-umlaut
|
||||
txt = re.sub(u'ø|ø|ø', r'{o/}', txt) # o-stroke
|
||||
txt = re.sub(u'ù|ù|ù', r'{u`}', txt) # u-grave
|
||||
txt = re.sub(u'ú|ú|ú', r"{u'}", txt) # u-acute
|
||||
txt = re.sub(u'û|û|û', r'{u^}', txt) # u-circumflex
|
||||
txt = re.sub(u'û|û|û', r'{u^}', txt) # u-circumflex
|
||||
txt = re.sub(u'ü|ü|ü', r'{u"}', txt) # u-umlaut
|
||||
txt = re.sub(u'ý|ý|ý', r"{y'}", txt) # y-acute
|
||||
txt = re.sub(u'ÿ|ÿ|ÿ', r'{y"}', txt) # y-umlaut
|
||||
|
||||
txt = re.sub(u'Č|Č|Č', r'{Cˇ}', txt) # C-caron
|
||||
txt = re.sub(u'č|č|č', r'{cˇ}', txt) # c-caron
|
||||
txt = re.sub(u'Ď|Ď|Ď', r'{Dˇ}', txt) # D-caron
|
||||
txt = re.sub(u'ď|ď|ď', r'{dˇ}', txt) # d-caron
|
||||
txt = re.sub(u'Ě|Ě|Ě', r'{Eˇ}', txt) # E-caron
|
||||
txt = re.sub(u'ě|ě|ě', r'{eˇ}', txt) # e-caron
|
||||
txt = re.sub(u'Ĺ|Ĺ|Ĺ', r"{L'}", txt) # L-acute
|
||||
txt = re.sub(u'ĺ|ĺ|ĺ', r"{l'}", txt) # l-acute
|
||||
txt = re.sub(u'Ľ|Ľ|Ľ', r'{Lˇ}', txt) # L-caron
|
||||
txt = re.sub(u'ľ|ľ|ľ', r'{lˇ}', txt) # l-caron
|
||||
txt = re.sub(u'Ň|Ň|Ň', r'{Nˇ}', txt) # N-caron
|
||||
txt = re.sub(u'ň|ň|ň', r'{nˇ}', txt) # n-caron
|
||||
|
||||
txt = re.sub(u'Œ|Œ|Œ', r'{OE}', txt) # OE
|
||||
txt = re.sub(u'œ|œ|œ', r'{oe}', txt) # oe
|
||||
txt = re.sub(u'Ŝ|Š|Ŝ', r'{S^}', txt) # Scaron
|
||||
txt = re.sub(u'ŝ|š|ŝ', r'{s^}', txt) # scaron
|
||||
|
||||
txt = re.sub(u'Ŕ|Ŕ|Ŕ', r"{R'}", txt) # R-acute
|
||||
txt = re.sub(u'ŕ|ŕ|ŕ', r"{r'}", txt) # r-acute
|
||||
txt = re.sub(u'Ř|Ř|Ř', r'{Rˇ}', txt) # R-caron
|
||||
txt = re.sub(u'ř|ř|ř', r'{rˇ}', txt) # r-caron
|
||||
txt = re.sub(u'Ŝ|Ŝ', r'{S^}', txt) # S-circumflex
|
||||
txt = re.sub(u'ŝ|ŝ', r'{s^}', txt) # s-circumflex
|
||||
txt = re.sub(u'Š|Š|Š', r'{Sˇ}', txt) # S-caron
|
||||
txt = re.sub(u'š|š|š', r'{sˇ}', txt) # s-caron
|
||||
txt = re.sub(u'Ť|Ť|Ť', r'{Tˇ}', txt) # T-caron
|
||||
txt = re.sub(u'ť|ť|ť', r'{tˇ}', txt) # t-caron
|
||||
txt = re.sub(u'Ů|Ů|Ů', r'{U°}', txt) # U-ring
|
||||
txt = re.sub(u'ů|ů|ů', r'{u°}', txt) # u-ring
|
||||
txt = re.sub(u'Ž|Ž|Ž', r'{Zˇ}', txt) # Z-caron
|
||||
txt = re.sub(u'ž|ž|ž', r'{zˇ}', txt) # z-caron
|
||||
|
||||
txt = re.sub(u'•|•|•', r'{*}', txt) # bullet
|
||||
txt = re.sub(u'₣|₣', r'{Fr}', txt) # Franc
|
||||
txt = re.sub(u'₤|₤', r'{L=}', txt) # Lira
|
||||
|
Loading…
x
Reference in New Issue
Block a user