Conversion heuristics: Make unwrapping recognize Georgian letters as well. Fixes #1753533 [Unwrapping fails on non-latin stripts](https://bugs.launchpad.net/calibre/+bug/1753533)

This commit is contained in:
Kovid Goyal 2018-03-08 13:50:21 +05:30
parent a31de75a39
commit 6dbf46c43e
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -358,7 +358,7 @@ class HeuristicProcessor(object):
# define the pieces of the regex # define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity # (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+unicode(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))" lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))"
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])" em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
soft_hyphen = u"\xad" soft_hyphen = u"\xad"
line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?" line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?"