TXT Input: remove unnecessary try block. Rework markdown and textile detection.

2025-07-09 03:04:10 -04:00 · 2011-01-11 18:08:55 -05:00 · 2011-01-11 18:08:55 -05:00 · 9585ba655c
commit 9585ba655c
parent 626f1b2558
2 changed files with 19 additions and 28 deletions
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -94,11 +94,7 @@ class TXTInput(InputFormatPlugin):
                    ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
        elif options.formatting_type == 'textile':
            log.debug('Running text though textile conversion...')
-            try:
+            html = convert_textile(txt)
                html = convert_textile(txt)
            except RuntimeError:
                raise ValueError('This txt file has malformed markup, it cannot be'
                    ' converted by calibre.')
        else:
            # Determine the paragraph type of the document.
            if options.paragraph_type == 'auto':
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -162,38 +162,33 @@ def detect_paragraph_type(txt):
 def detect_formatting_type(txt):
    markdown_count = 0
    textile_count = 0
    # Check for markdown
    # Headings
-    if len(re.findall('(?mu)^#+', txt)) >= 5:
+    markdown_count += len(re.findall('(?mu)^#+', txt)) 
-        return 'markdown'
+    markdown_count += len(re.findall('(?mu)^=+$', txt))
-    if len(re.findall('(?mu)^=+$', txt)) >= 5:
+    markdown_count += len(re.findall('(?mu)^-+$', txt))
        return 'markdown'
    if len(re.findall('(?mu)^-+$', txt)) >= 5:
        return 'markdown'
    # Images
-    if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
+    markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
        return 'markdown'
    # Links
-    if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
+    markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
        return 'markdown'
    # Escaped characters
    md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
    for c in md_escapted_characters:
        if txt.count('\\'+c) > 10:
            return 'markdown'
    # Check for textile
    # Headings
-    if len(re.findall(r'h[1-6]\.', txt)) >= 5:
+    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
        return 'textile'
    # Block quote.
-    if len(re.findall(r'bq\.', txt)) >= 5:
+    textile_count += len(re.findall(r'(?mu)^bq\.', txt))
        return 'textile'
    # Images
-    if len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) >= 5:
+    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
        return 'textile'
    # Links
-    if len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) >= 5:
+    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
-        return 'textile'
+    
    if markdown_count > 5 or textile_count > 5:
        if markdown_count > textile_count:
            return 'markdown'
        else:
            return 'textile'
    return 'heuristic'