mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: remove unnecessary try block. Rework markdown and textile detection.
This commit is contained in:
parent
626f1b2558
commit
9585ba655c
@ -94,11 +94,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
elif options.formatting_type == 'textile':
|
elif options.formatting_type == 'textile':
|
||||||
log.debug('Running text though textile conversion...')
|
log.debug('Running text though textile conversion...')
|
||||||
try:
|
html = convert_textile(txt)
|
||||||
html = convert_textile(txt)
|
|
||||||
except RuntimeError:
|
|
||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
|
||||||
' converted by calibre.')
|
|
||||||
else:
|
else:
|
||||||
# Determine the paragraph type of the document.
|
# Determine the paragraph type of the document.
|
||||||
if options.paragraph_type == 'auto':
|
if options.paragraph_type == 'auto':
|
||||||
|
@ -162,38 +162,33 @@ def detect_paragraph_type(txt):
|
|||||||
|
|
||||||
|
|
||||||
def detect_formatting_type(txt):
|
def detect_formatting_type(txt):
|
||||||
|
markdown_count = 0
|
||||||
|
textile_count = 0
|
||||||
|
|
||||||
# Check for markdown
|
# Check for markdown
|
||||||
# Headings
|
# Headings
|
||||||
if len(re.findall('(?mu)^#+', txt)) >= 5:
|
markdown_count += len(re.findall('(?mu)^#+', txt))
|
||||||
return 'markdown'
|
markdown_count += len(re.findall('(?mu)^=+$', txt))
|
||||||
if len(re.findall('(?mu)^=+$', txt)) >= 5:
|
markdown_count += len(re.findall('(?mu)^-+$', txt))
|
||||||
return 'markdown'
|
|
||||||
if len(re.findall('(?mu)^-+$', txt)) >= 5:
|
|
||||||
return 'markdown'
|
|
||||||
# Images
|
# Images
|
||||||
if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
|
markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
|
||||||
return 'markdown'
|
|
||||||
# Links
|
# Links
|
||||||
if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
|
markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
|
||||||
return 'markdown'
|
|
||||||
# Escaped characters
|
|
||||||
md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
|
|
||||||
for c in md_escapted_characters:
|
|
||||||
if txt.count('\\'+c) > 10:
|
|
||||||
return 'markdown'
|
|
||||||
|
|
||||||
# Check for textile
|
# Check for textile
|
||||||
# Headings
|
# Headings
|
||||||
if len(re.findall(r'h[1-6]\.', txt)) >= 5:
|
textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
|
||||||
return 'textile'
|
|
||||||
# Block quote.
|
# Block quote.
|
||||||
if len(re.findall(r'bq\.', txt)) >= 5:
|
textile_count += len(re.findall(r'(?mu)^bq\.', txt))
|
||||||
return 'textile'
|
|
||||||
# Images
|
# Images
|
||||||
if len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) >= 5:
|
textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
|
||||||
return 'textile'
|
|
||||||
# Links
|
# Links
|
||||||
if len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) >= 5:
|
textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
|
||||||
return 'textile'
|
|
||||||
|
if markdown_count > 5 or textile_count > 5:
|
||||||
|
if markdown_count > textile_count:
|
||||||
|
return 'markdown'
|
||||||
|
else:
|
||||||
|
return 'textile'
|
||||||
|
|
||||||
return 'heuristic'
|
return 'heuristic'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user