diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 4dd6e7c7ae..6f8e94f180 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -16,7 +16,6 @@ import uuid
from lxml import etree
-from calibre import guess_type
from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@@ -41,7 +40,7 @@ class FB2MLizer(object):
# in different directories. FB2 images are all in a flat layout so we rename all images
# into a sequential numbering system to ensure there are no collisions between image names.
self.image_hrefs = {}
- # Mapping of toc items and their
+ # Mapping of toc items and their
self.toc = {}
# Used to see whether a new \s*
', '
\n\n', text) - + text = re.sub(r'(?miu)
some textile
' + """ + self.html_type = html_type + + # text = unicode(text) + text = _normalize_newlines(text) + + if self.restricted: + text = self.encode_html(text, quotes=False) + + if rel: + self.rel = ' rel="%s"' % rel + + text = self.getRefs(text) + + text = self.block(text, int(head_offset)) + + text = self.retrieve(text) + + return text + + def pba(self, input, element=None): + """ + Parse block attributes. + + >>> t = Textile() + >>> t.pba(r'\3') + '' + >>> t.pba(r'\\3', element='td') + ' colspan="3"' + >>> t.pba(r'/4', element='td') + ' rowspan="4"' + >>> t.pba(r'\\3/4', element='td') + ' colspan="3" rowspan="4"' + + >>> t.vAlign('^') + 'top' + + >>> t.pba('^', element='td') + ' style="vertical-align:top;"' + + >>> t.pba('{line-height:18px}') + ' style="line-height:18px;"' + + >>> t.pba('(foo-bar)') + ' class="foo-bar"' + + >>> t.pba('(#myid)') + ' id="myid"' + + >>> t.pba('(foo-bar#myid)') + ' class="foo-bar" id="myid"' + + >>> t.pba('((((') + ' style="padding-left:4em;"' + + >>> t.pba(')))') + ' style="padding-right:3em;"' + + >>> t.pba('[fr]') + ' lang="fr"' + + """ + style = [] + aclass = '' + lang = '' + colspan = '' + rowspan = '' + id = '' + + if not input: + return '' + + matched = input + if element == 'td': + m = re.search(r'\\(\d+)', matched) + if m: + colspan = m.group(1) + + m = re.search(r'/(\d+)', matched) + if m: + rowspan = m.group(1) + + if element == 'td' or element == 'tr': + m = re.search(r'(%s)' % self.vlgn, matched) + if m: + style.append("vertical-align:%s;" % self.vAlign(m.group(1))) + + m = re.search(r'\{([^}]*)\}', matched) + if m: + style.append(m.group(1).rstrip(';') + ';') + matched = matched.replace(m.group(0), '') + + m = re.search(r'\[([^\]]+)\]', matched, re.U) + if m: + lang = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'\(([^()]+)\)', matched, re.U) + if m: + aclass = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([(]+)', matched) + if m: + style.append("padding-left:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([)]+)', matched) + if m: + style.append("padding-right:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'(%s)' % self.hlgn, matched) + if m: + style.append("text-align:%s;" % self.hAlign(m.group(1))) + + m = re.search(r'^(.*)#(.*)$', aclass) + if m: + id = m.group(2) + aclass = m.group(1) + + if self.restricted: + if lang: + return ' lang="%s"' + else: + return '' + + result = [] + if style: + result.append(' style="%s"' % "".join(style)) + if aclass: + result.append(' class="%s"' % aclass) + if lang: + result.append(' lang="%s"' % lang) + if id: + result.append(' id="%s"' % id) + if colspan: + result.append(' colspan="%s"' % colspan) + if rowspan: + result.append(' rowspan="%s"' % rowspan) + return ''.join(result) + + def hasRawText(self, text): + """ + checks whether the text has text not already enclosed by a block tag + + >>> t = Textile() + >>> t.hasRawText('foo bar biz baz
') + False + + >>> t.hasRawText(' why yes, yes it does') + True + + """ + r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*\1>', re.S).sub('', text.strip()).strip() + r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) + return '' != r + + def table(self, text): + r""" + >>> t = Textile() + >>> t.table('|one|two|three|\n|a|b|c|') + '\tone | \n\t\t\ttwo | \n\t\t\tthree | \n\t\t
a | \n\t\t\tb | \n\t\t\tc | \n\t\t
\\n', '\\t\\t') + + >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote") + ('\\t', 'Hello BlockQuote', '
', '\\n\\t
\\n', '\\t\\t') + + >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS + ('', 'Hello BlockQuote', '
', '\\n\\t
', '', ..., '
', '
')
+
+ >>> t.fBlock("h1", "", None, "", "foobar")
+ ('', '\\t\n" % (cite, atts) + o2 = "\t\t" + + elif tag == 'bc': + o1 = "" % atts + c2 = "
" + c1 = "\n\t
" % atts
+ o2 = "" % atts
+ c2 = "
"
+ c1 = "
"
+ content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+
+ elif tag == 'notextile':
+ content = self.shelve(content)
+ o1 = o2 = ''
+ c1 = c2 = ''
+
+ elif tag == 'pre':
+ content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+ o1 = "" % atts + o2 = c2 = '' + c1 = '' + + else: + o2 = "\t<%s%s>" % (tag, atts) + c2 = "%s>" % tag + + content = self.graf(content) + return o1, o2, content, c2, c1 + + def footnoteRef(self, text): + """ + >>> t = Textile() + >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS + 'foo1 ' + """ + return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text) + + def footnoteID(self, match): + id, t = match.groups() + if id not in self.fn: + self.fn[id] = str(uuid.uuid4()) + fnid = self.fn[id] + if not t: + t = '' + return '%s%s' % (fnid, id, t) + + def glyphs(self, text): + """ + >>> t = Textile() + + >>> t.glyphs("apostrophe's") + 'apostrophe’s' + + >>> t.glyphs("back in '88") + 'back in ’88' + + >>> t.glyphs('foo ...') + 'foo …' + + >>> t.glyphs('--') + '—' + + >>> t.glyphs('FooBar[tm]') + 'FooBar™' + + >>> t.glyphs("
Cat's Cradle by Vonnegut
") + 'Cat’s Cradle by Vonnegut
' + + """ + # fix: hackish + text = re.sub(r'"\Z', '\" ', text) + + glyph_search = ( + re.compile(r"(\w)\'(\w)"), # apostrophe's + re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88 + re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing + re.compile(r'\'/'), # single opening + re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing + re.compile(r'"'), # double opening + re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym + re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase + re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis + re.compile(r'(\s?)--(\s?)'), # em dash + re.compile(r'\s-(?:\s|$)'), # en dash + re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign + re.compile(r'\b ?[([]TM[])]', re.I), # trademark + re.compile(r'\b ?[([]R[])]', re.I), # registered + re.compile(r'\b ?[([]C[])]', re.I), # copyright + ) + + glyph_replace = [x % dict(self.glyph_defaults) for x in ( + r'\1%(txt_apostrophe)s\2', # apostrophe's + r'\1%(txt_apostrophe)s\2', # back in '88 + r'\1%(txt_quote_single_close)s', # single closing + r'%(txt_quote_single_open)s', # single opening + r'\1%(txt_quote_double_close)s', # double closing + r'%(txt_quote_double_open)s', # double opening + r'\1', # 3+ uppercase acronym + r'\1', # 3+ uppercase + r'\1%(txt_ellipsis)s', # ellipsis + r'\1%(txt_emdash)s\2', # em dash + r' %(txt_endash)s ', # en dash + r'\1\2%(txt_dimension)s\3', # dimension sign + r'%(txt_trademark)s', # trademark + r'%(txt_registered)s', # registered + r'%(txt_copyright)s', # copyright + )] + + result = [] + for line in re.compile(r'(<.*?>)', re.U).split(text): + if not re.search(r'<.*>', line): + for s, r in zip(glyph_search, glyph_replace): + line = s.sub(r, line) + result.append(line) + return ''.join(result) + + def vAlign(self, input): + d = {'^':'top', '-':'middle', '~':'bottom'} + return d.get(input, '') + + def hAlign(self, input): + d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'} + return d.get(input, '') + + def getRefs(self, text): + """ + what is this for? + """ + pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U) + text = pattern.sub(self.refs, text) + return text + + def refs(self, match): + flag, url = match.groups() + self.urlrefs[flag] = url + return '' + + def checkRefs(self, url): + return self.urlrefs.get(url, url) + + def isRelURL(self, url): + """ + Identify relative urls. + + >>> t = Textile() + >>> t.isRelURL("http://www.google.com/") + False + >>> t.isRelURL("/foo") + True + + """ + (scheme, netloc) = urlparse(url)[0:2] + return not scheme and not netloc + + def relURL(self, url): + scheme = urlparse(url)[0] + if self.restricted and scheme and scheme not in self.url_schemes: + return '#' + return url + + def shelve(self, text): + id = str(uuid.uuid4()) + self.shelf[id] = text + return id + + def retrieve(self, text): + """ + >>> t = Textile() + >>> id = t.shelve("foobar") + >>> t.retrieve(id) + 'foobar' + """ + while True: + old = text + for k, v in self.shelf.items(): + text = text.replace(k, v) + if text == old: + break + return text + + def encode_html(self, text, quotes=True): + a = ( + ('&', '&'), + ('<', '<'), + ('>', '>') + ) + + if quotes: + a = a + ( + ("'", '''), + ('"', '"') + ) + + for k, v in a: + text = text.replace(k, v) + return text + + def graf(self, text): + if not self.lite: + text = self.noTextile(text) + text = self.code(text) + + text = self.links(text) + + if not self.noimage: + text = self.image(text) + + if not self.lite: + text = self.lists(text) + text = self.table(text) + + text = self.span(text) + text = self.footnoteRef(text) + text = self.glyphs(text) + + return text.rstrip('\n') + + def links(self, text): + """ + >>> t = Textile() + >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS + 'fooobar ... and hello world ...' + """ + + punct = '!"#$%&\'*+,-./:;=?@\\^_`|~' + + pattern = r''' + (?P[\s\[{(]|[%s] )? + " # start + (?P%s ) + (?P [^"]+? ) + \s? + (?: \(([^)]+?)\)(?=") )? # $title + ": + (?P (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|] ) + (?P [^\w\/;]*? ) + (?=<|\s|$) + ''' % (re.escape(punct), self.c) + + text = re.compile(pattern, re.X).sub(self.fLink, text) + + return text + + def fLink(self, match): + pre, atts, text, title, url, post = match.groups() + + if pre == None: + pre = '' + + # assume ) at the end of the url is not actually part of the url + # unless the url also contains a ( + if url.endswith(')') and not url.find('(') > -1: + post = url[-1] + post + url = url[:-1] + + url = self.checkRefs(url) + + atts = self.pba(atts) + if title: + atts = atts + ' title="%s"' % self.encode_html(title) + + if not self.noimage: + text = self.image(text) + + text = self.span(text) + text = self.glyphs(text) + + url = self.relURL(url) + out = '%s' % (self.encode_html(url), atts, self.rel, text) + out = self.shelve(out) + return ''.join([pre, out, post]) + + def span(self, text): + """ + >>> t = Textile() + >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye") + 'hello span strong and bold goodbye' + """ + qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^') + pnct = ".,\"'?!;:" + + for qtag in qtags: + pattern = re.compile(r""" + (?:^|(?<=[\s>%(pnct)s])|([\]}])) + (%(qtag)s)(?!%(qtag)s) + (%(c)s) + (?::(\S+))? + ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n]) + ([%(pnct)s]*) + %(qtag)s + (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s)) + """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct, + 'selfpnct':self.pnct}, re.X) + text = pattern.sub(self.fSpan, text) + return text + + + def fSpan(self, match): + _, tag, atts, cite, content, end, _ = match.groups() + + qtags = { + '*': 'strong', + '**': 'b', + '??': 'cite', + '_' : 'em', + '__': 'i', + '-' : 'del', + '%' : 'span', + '+' : 'ins', + '~' : 'sub', + '^' : 'sup' + } + tag = qtags[tag] + atts = self.pba(atts) + if cite: + atts = atts + 'cite="%s"' % cite + + content = self.span(content) + + out = "<%s%s>%s%s%s>" % (tag, atts, content, end, tag) + return out + + def image(self, text): + """ + >>> t = Textile() + >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com') + ' ' + """ + pattern = re.compile(r""" + (?:[\[{])? # pre + \! # opening ! + (%s) # optional style,class atts + (?:\. )? # optional dot-space + ([^\s(!]+) # presume this is the src + \s? # optional space + (?:\(([^\)]+)\))? # optional title + \! # closing + (?::(\S+))? # optional href + (?:[\]}]|(?=\s|$)) # lookahead: space or end of string + """ % self.c, re.U|re.X) + return pattern.sub(self.fImage, text) + + def fImage(self, match): + # (None, '', '/imgs/myphoto.jpg', None, None) + atts, url, title, href = match.groups() + atts = self.pba(atts) + + if title: + atts = atts + ' title="%s" alt="%s"' % (title, title) + else: + atts = atts + ' alt=""' + + if not self.isRelURL(url) and self.get_sizes: + size = getimagesize(url) + if (size): + atts += " %s" % size + + if href: + href = self.checkRefs(href) + + url = self.checkRefs(url) + url = self.relURL(url) + + out = [] + if href: + out.append('' % href) + if self.html_type == 'html': + out.append('
' % (url, atts)) + else: + out.append('
' % (url, atts)) + if href: + out.append('') + + return ''.join(out) + + def code(self, text): + text = self.doSpecial(text, '
', '
', self.fCode) + text = self.doSpecial(text, '@', '@', self.fCode) + text = self.doSpecial(text, '', '', self.fPre) + return text + + def fCode(self, match): + before, text, after = match.groups() + if after == None: + after = '' + # text needs to be escaped + if not self.restricted: + text = self.encode_html(text) + return ''.join([before, self.shelve('%s
' % text), after]) + + def fPre(self, match): + before, text, after = match.groups() + if after == None: + after = '' + # text needs to be escapedd + if not self.restricted: + text = self.encode_html(text) + return ''.join([before, '', self.shelve(text), '', after]) + + def doSpecial(self, text, start, end, method=None): + if method == None: + method = self.fSpecial + pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S) + return pattern.sub(method, text) + + def fSpecial(self, match): + """ + special blocks like notextile or code + """ + before, text, after = match.groups() + if after == None: + after = '' + return ''.join([before, self.shelve(self.encode_html(text)), after]) + + def noTextile(self, text): + text = self.doSpecial(text, '', ' ', self.fTextile) + return self.doSpecial(text, '==', '==', self.fTextile) + + def fTextile(self, match): + before, notextile, after = match.groups() + if after == None: + after = '' + return ''.join([before, self.shelve(notextile), after]) + + +def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None): + """ + this function takes additional parameters: + head_offset - offset to apply to heading levels (default: 0) + html_type - 'xhtml' or 'html' style tags (default: 'xhtml') + """ + return Textile().textile(text, head_offset=head_offset, + html_type=html_type) + +def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'): + """ + Restricted version of Textile designed for weblog comments and other + untrusted input. + + Raw HTML is escaped. + Style attributes are disabled. + rel='nofollow' is added to external links. + + When lite=True is set (the default): + Block tags are restricted to p, bq, and bc. + Lists and tables are disabled. + + When noimage=True is set (the default): + Image tags are disabled. + + """ + return Textile(restricted=True, lite=lite, + noimage=noimage).textile(text, rel='nofollow', + html_type=html_type) + diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index aaff8b55c0..0b0bd6d570 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - convert_heuristic, normalize_line_endings + convert_heuristic, normalize_line_endings, convert_textile from calibre import _ent_pat, xml_entity_to_unicode class TXTInput(InputFormatPlugin): @@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin): 'paragraph and no styling is applied.\n' '* heuristic: Process using heuristics to determine formatting such ' 'as chapter headings and italic text.\n' + '* textile: Processing using textile formatting.\n' '* markdown: Processing using markdown formatting. ' 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), OptionRecommendation(name='preserve_spaces', recommended_value=False, @@ -91,6 +92,9 @@ class TXTInput(InputFormatPlugin): except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') + elif options.formatting_type == 'textile': + log.debug('Running text though textile conversion...') + html = convert_textile(txt) else: # Determine the paragraph type of the document. if options.paragraph_type == 'auto': diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 6a1a106681..3702bbfabe 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -7,7 +7,6 @@ Read content from txt file. import os, re from calibre import prepare_string_for_xml, isbytestring -from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.conversion.preprocess import DocAnalysis @@ -37,7 +36,7 @@ def clean_txt(txt): chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) illegal_chars = re.compile(u'|'.join(map(unichr, chars))) txt = illegal_chars.sub('', txt) - + return txt def split_txt(txt, epub_split_size_kb=0): @@ -74,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0): return tp.convert(txt, title, epub_split_size_kb) def convert_markdown(txt, title='', disable_toc=False): + from calibre.ebooks.markdown import markdown md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], extension_configs={"toc": {"disable_toc": disable_toc}}, safe_mode=False) return HTML_TEMPLATE % (title, md.convert(txt)) +def convert_textile(txt, title=''): + from calibre.ebooks.textile import textile + html = textile(txt, encoding='utf-8') + return HTML_TEMPLATE % (title, html) + def normalize_line_endings(txt): txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') @@ -115,66 +120,75 @@ def split_string_separator(txt, size) : def detect_paragraph_type(txt): ''' Tries to determine the formatting of the document. - + block: Paragraphs are separated by a blank line. single: Each line is a paragraph. print: Each paragraph starts with a 2+ spaces or a tab and ends when a new paragraph is reached. unformatted: most lines have hard line breaks, few/no blank lines or indents - + returns block, single, print, unformatted ''' txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') txt_line_count = len(re.findall('(?mu)^\s*.+$', txt)) - + # Check for hard line breaks - true if 55% of the doc breaks in the same region docanalysis = DocAnalysis('txt', txt) hardbreaks = docanalysis.line_histogram(.55) - + if hardbreaks: # Determine print percentage tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) print_percent = tab_line_count / float(txt_line_count) - + # Determine block percentage empty_line_count = len(re.findall('(?mu)^\s*$', txt)) block_percent = empty_line_count / float(txt_line_count) - + # Compare the two types - the type with the larger number of instances wins # in cases where only one or the other represents the vast majority of the document neither wins if print_percent >= block_percent: if .15 <= print_percent <= .75: return 'print' elif .15 <= block_percent <= .75: - return 'block' + return 'block' - # Assume unformatted text with hardbreaks if nothing else matches + # Assume unformatted text with hardbreaks if nothing else matches return 'unformatted' - + # return single if hardbreaks is false return 'single' def detect_formatting_type(txt): + markdown_count = 0 + textile_count = 0 + # Check for markdown # Headings - if len(re.findall('(?mu)^#+', txt)) >= 5: - return 'markdown' - if len(re.findall('(?mu)^=+$', txt)) >= 5: - return 'markdown' - if len(re.findall('(?mu)^-+$', txt)) >= 5: - return 'markdown' + markdown_count += len(re.findall('(?mu)^#+', txt)) + markdown_count += len(re.findall('(?mu)^=+$', txt)) + markdown_count += len(re.findall('(?mu)^-+$', txt)) # Images - if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5: - return 'markdown' + markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) # Links - if len(re.findall('(?u)(^|(?P[^!]))\[.*?\]\([^)]+\)', txt)) >= 5: - return 'markdown' - # Escaped characters - md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!'] - for c in md_escapted_characters: - if txt.count('\\'+c) > 10: + markdown_count += len(re.findall('(?u)(^|(?P[^!]))\[.*?\]\([^)]+\)', txt)) + + # Check for textile + # Headings + textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt)) + # Block quote. + textile_count += len(re.findall(r'(?mu)^bq\.', txt)) + # Images + textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) + # Links + textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) + + if markdown_count > 5 or textile_count > 5: + if markdown_count > textile_count: return 'markdown' - + else: + return 'textile' + return 'heuristic'