mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion heuristics: Make unwrapping recognize Georgian letters as well. Fixes #1753533 [Unwrapping fails on non-latin stripts](https://bugs.launchpad.net/calibre/+bug/1753533)
This commit is contained in:
parent
a31de75a39
commit
6dbf46c43e
@ -358,7 +358,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# define the pieces of the regex
|
# define the pieces of the regex
|
||||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))"
|
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))"
|
||||||
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
||||||
soft_hyphen = u"\xad"
|
soft_hyphen = u"\xad"
|
||||||
line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?"
|
line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user