From 9585ba655c810bb9132f3d6d7299455d23d47493 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 11 Jan 2011 18:08:55 -0500
Subject: [PATCH] TXT Input: remove unnecessary try block. Rework markdown and
 textile detection.

---
 src/calibre/ebooks/txt/input.py     |  6 +----
 src/calibre/ebooks/txt/processor.py | 41 +++++++++++++----------------
 2 files changed, 19 insertions(+), 28 deletions(-)
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 73af3acde4..0b0bd6d570 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -94,11 +94,7 @@ class TXTInput(InputFormatPlugin):
                     ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
         elif options.formatting_type == 'textile':
             log.debug('Running text though textile conversion...')
-            try:
-                html = convert_textile(txt)
-            except RuntimeError:
-                raise ValueError('This txt file has malformed markup, it cannot be'
-                    ' converted by calibre.')
+            html = convert_textile(txt)
         else:
             # Determine the paragraph type of the document.
             if options.paragraph_type == 'auto':
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index d0526bd9fc..d59fd4121a 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -162,38 +162,33 @@ def detect_paragraph_type(txt):
 
 
 def detect_formatting_type(txt):
+    markdown_count = 0
+    textile_count = 0
+    
     # Check for markdown
     # Headings
-    if len(re.findall('(?mu)^#+', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^=+$', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^-+$', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?mu)^#+', txt)) 
+    markdown_count += len(re.findall('(?mu)^=+$', txt))
+    markdown_count += len(re.findall('(?mu)^-+$', txt))
     # Images
-    if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
     # Links
-    if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
-        return 'markdown'
-    # Escaped characters
-    md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
-    for c in md_escapted_characters:
-        if txt.count('\\'+c) > 10:
-            return 'markdown'
+    markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
         
     # Check for textile
     # Headings
-    if len(re.findall(r'h[1-6]\.', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
     # Block quote.
-    if len(re.findall(r'bq\.', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'(?mu)^bq\.', txt))
     # Images
-    if len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
     # Links
-    if len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+    
+    if markdown_count > 5 or textile_count > 5:
+        if markdown_count > textile_count:
+            return 'markdown'
+        else:
+            return 'textile'
     
     return 'heuristic'