diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 5fceeb7aed..691aa307d7 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -224,6 +224,10 @@ class Dehyphenator(object): return firsthalf+u'\u2014'+wraptags+secondhalf else: + if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6: + if self.verbose > 2: + self.log("too short, returned hyphenated word: " + str(hyphenated)) + return hyphenated if len(firsthalf) <= 2 and len(secondhalf) <= 2: if self.verbose > 2: self.log("too short, returned hyphenated word: " + str(hyphenated)) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 9fd8af0d70..43aadc6576 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -175,9 +175,9 @@ def detect_formatting_type(txt): # Block quote. textile_count += len(re.findall(r'(?mu)^bq\.', txt)) # Images - textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) + textile_count += len(re.findall(r'\![^\s]+(?=.*?/)(:[^\s]+)*', txt)) # Links - textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) + textile_count += len(re.findall(r'"(?=".*?\()(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) if markdown_count > 5 or textile_count > 5: if markdown_count > textile_count: