mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix regex for py3
This commit is contained in:
parent
260c330da3
commit
b0fe64571e
@ -602,7 +602,7 @@ class HTMLPreProcessor(object):
|
|||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile((
|
(re.compile((
|
||||||
r'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]'
|
r'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]'
|
||||||
r'|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?'
|
r'|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?'
|
||||||
r'\s*[\w\d$(])') % length, re.UNICODE), wrap_lines),
|
r'\s*[\w\d$(])') % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
@ -367,7 +367,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# define the pieces of the regex
|
# define the pieces of the regex
|
||||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||||
lookahead = "(?<=.{"+unicode_type(length)+"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
lookahead = "(?<=.{"+unicode_type(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?<!\&\w{4});))"
|
||||||
em_en_lookahead = "(?<=.{"+unicode_type(length)+"}[\u2013\u2014])"
|
em_en_lookahead = "(?<=.{"+unicode_type(length)+"}[\u2013\u2014])"
|
||||||
soft_hyphen = "\xad"
|
soft_hyphen = "\xad"
|
||||||
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user