mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More textile work.
This commit is contained in:
parent
804b248d46
commit
be3d441d3b
@ -70,7 +70,6 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
print 'New'
|
|
||||||
if opts.txt_output_formatting.lower() == 'markdown':
|
if opts.txt_output_formatting.lower() == 'markdown':
|
||||||
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
||||||
self.writer = MarkdownMLizer(log)
|
self.writer = MarkdownMLizer(log)
|
||||||
|
@ -58,31 +58,39 @@ class TextileMLizer(OEB2HTML):
|
|||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
|
|
||||||
def tidy_up(self, text):
|
def tidy_up(self, text):
|
||||||
def check_count(text, tests):
|
# def check_count(text, tests):
|
||||||
x = []
|
# x = []
|
||||||
for i, t in enumerate(reversed(tests)):
|
# for i, t in enumerate(reversed(tests)):
|
||||||
x.append((text.count(t), i, t))
|
# x.append((text.count(t), i, t))
|
||||||
if x:
|
# if x:
|
||||||
return sorted(x, key=itemgetter(0, 1), reverse=True)[0][2]
|
# return sorted(x, key=itemgetter(0, 1), reverse=True)[0][2]
|
||||||
return ''
|
# return ''
|
||||||
|
|
||||||
# NEEDS TWEAKING
|
# Needs tweaking and finetuning - don't use yet.
|
||||||
# def check_escaping(text, tests):
|
def check_escaping(text, tests):
|
||||||
# for t in tests:
|
for t in tests:
|
||||||
# text = re.sub(r'(\S)('+t+'\w+'+t+')', r'\1[\2]', text)
|
text = re.sub(r'(\S)'+t+t+'(\S)', r'\1\2', text)
|
||||||
# text = re.sub(r'(\s)('+t+'\w+'+t+')(\S)', r'\1[\2]\3', text)
|
# text = re.sub(r'(\w)('+t+'\w+'+t+')', r'\1[\2]', text)
|
||||||
# return text
|
# text = re.sub(r'('+t+'\w+'+t+')(\w)', r'[\1]\2', text)
|
||||||
|
# text = re.sub(r'(["\'])\[('+t+'\w+'+t+')\]', r'\1\2', text)
|
||||||
|
# text = re.sub(r'\[('+t+'\w+'+t+')\](["\',\.!\?])', r'\1\2', text)
|
||||||
|
return text
|
||||||
|
|
||||||
txt = check_count(text, ['\np<. ', '\np<>. ', '\np. '])
|
# txt = check_count(text, ['\np<. ', '\np<>. ', '\np. '])
|
||||||
text = re.sub(txt+'(\S)', r'\n\1', text)
|
# text = re.sub(txt+'(\S)', r'\n\1', text)
|
||||||
|
|
||||||
# text = check_escaping(text, ['\^', '\*', '_', '\+', '~', '-'])
|
text = check_escaping(text, ['\^', '\*', '_', '\+', '~', '-'])
|
||||||
|
|
||||||
text = re.sub('\npre\. bc\.', '\nbc.', text)
|
text = re.sub(r'^\n+', r'', text)
|
||||||
text = re.sub('\np=. p. ', '\np. ', text)
|
text = re.sub(r'\npre\. bc\.', r'\nbc.', text)
|
||||||
text = re.sub('\np=. \n', '\n', text)
|
text = re.sub(r'\nbq\. \n\np\. ', r'\nbq. ', text)
|
||||||
text = re.sub('\n{3,}', '\n\n', text)
|
text = re.sub(r'\n{4,}', r'\n\np. \n\n', text)
|
||||||
text = re.sub(' \|', '|', text)
|
text = re.sub(r'\n{3}', r'\n\n', text)
|
||||||
|
text = re.sub(r'(p.*\. \n?)(p.*\. )', r'\2', text)
|
||||||
|
text = re.sub(r'p.*\. \n\n', r'', text)
|
||||||
|
# text = re.sub(u'\n \n', r'\n<br />\n', text) # blank paragraph - br tag
|
||||||
|
text = re.sub(u'p.*\. \xa0', r'p. ', text) # blank paragraph
|
||||||
|
text = re.sub(r' \|', r'|', text)
|
||||||
|
|
||||||
# started work on trying to fix footnotes
|
# started work on trying to fix footnotes
|
||||||
# text = re.sub(r'\^"(\d+)":#.+\^', r'[\1]', text)
|
# text = re.sub(r'\^"(\d+)":#.+\^', r'[\1]', text)
|
||||||
@ -95,19 +103,28 @@ class TextileMLizer(OEB2HTML):
|
|||||||
# Condense redundant spaces created by replacing newlines with spaces.
|
# Condense redundant spaces created by replacing newlines with spaces.
|
||||||
text = re.sub(r'[ ]{2,}', ' ', text)
|
text = re.sub(r'[ ]{2,}', ' ', text)
|
||||||
text = re.sub(r'\t +', '', text)
|
text = re.sub(r'\t +', '', text)
|
||||||
|
# text = re.sub(r'\n +', '', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def remove_leading_ws(self, text):
|
def remove_leading_ws(self, text):
|
||||||
text = text.replace('\r\n', '\n')
|
text = text.replace('\r\n', '\n')
|
||||||
text = text.replace('\r', '\n')
|
text = text.replace('\r', '\n')
|
||||||
text = re.sub(r'\n+', '\n', text)
|
|
||||||
text = re.sub(r'\n[\t ]+', '\n', text)
|
text = re.sub(r'\n[\t ]+', '\n', text)
|
||||||
|
text = re.sub(r'\n{2,}', '\n', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def check_align(self, style, align, tests):
|
def check_halign(self, style):
|
||||||
|
tests = {'left':'<','justify':'<>','center':'=','right':'>'}
|
||||||
for i in tests:
|
for i in tests:
|
||||||
if style[align] == i[0]:
|
if style['text-align'] == i:
|
||||||
return i[1]
|
return tests[i]
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def check_valign(self, style):
|
||||||
|
tests = {'top':'^','bottom':'~', 'middle':'-'}
|
||||||
|
for i in tests:
|
||||||
|
if style['vertical-align'] == i:
|
||||||
|
return tests[i]
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def check_padding(self, style, tests):
|
def check_padding(self, style, tests):
|
||||||
@ -124,15 +141,16 @@ class TextileMLizer(OEB2HTML):
|
|||||||
def check_id_tag(self, attribs):
|
def check_id_tag(self, attribs):
|
||||||
txt = ''
|
txt = ''
|
||||||
if attribs.has_key('id'):
|
if attribs.has_key('id'):
|
||||||
|
#if attribs['id'] in self.links:
|
||||||
txt = '(#'+attribs['id']+')'
|
txt = '(#'+attribs['id']+')'
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
def build_block(self, tag, style, attribs, finish):
|
def build_block(self, tag, style, attribs, finish):
|
||||||
txt = tag
|
txt = '\n' + tag
|
||||||
if self.opts.keep_links:
|
if self.opts.keep_links:
|
||||||
txt += self.check_id_tag(attribs)
|
txt += self.check_id_tag(attribs)
|
||||||
txt += self.check_padding(style, [['padding-left','('],['padding-right',')']])
|
txt += self.check_padding(style, [['padding-left','('],['padding-right',')']])
|
||||||
txt += self.check_align(style, 'text-align', [['left','<'],['justify','<>'],['center','='],['right','>']])
|
txt += self.check_halign(style)
|
||||||
txt += finish
|
txt += finish
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
@ -163,7 +181,17 @@ class TextileMLizer(OEB2HTML):
|
|||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'):
|
# Soft scene breaks.
|
||||||
|
text.append(self.check_padding(style, ['margin-top',u'\n\n\xa0']))
|
||||||
|
|
||||||
|
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div'):
|
||||||
|
#For debugging
|
||||||
|
if tag == 'h1':
|
||||||
|
for i in self.links:
|
||||||
|
text.append(i)
|
||||||
|
text.append('\n')
|
||||||
|
if tag == 'div':
|
||||||
|
tag = 'p'
|
||||||
text.append(self.build_block(tag, style, attribs, '. '))
|
text.append(self.build_block(tag, style, attribs, '. '))
|
||||||
tags.append('\n')
|
tags.append('\n')
|
||||||
|
|
||||||
@ -191,10 +219,10 @@ class TextileMLizer(OEB2HTML):
|
|||||||
tags.append('-')
|
tags.append('-')
|
||||||
self.style_strike = True
|
self.style_strike = True
|
||||||
if style['font-variant'] == 'small-caps':
|
if style['font-variant'] == 'small-caps':
|
||||||
if self.style_smallcap == 0:
|
if self.style_smallcap == False:
|
||||||
text.append('&')
|
text.append('&')
|
||||||
tags.append('&')
|
tags.append('&')
|
||||||
self.style_smallcap = 1
|
self.style_smallcap = True
|
||||||
if tag == 'br':
|
if tag == 'br':
|
||||||
text.append('')
|
text.append('')
|
||||||
tags.append('\n')
|
tags.append('\n')
|
||||||
@ -236,7 +264,10 @@ class TextileMLizer(OEB2HTML):
|
|||||||
tags.append('(' + attribs['title'] + ')')
|
tags.append('(' + attribs['title'] + ')')
|
||||||
elif tag == 'img':
|
elif tag == 'img':
|
||||||
if self.opts.keep_image_references:
|
if self.opts.keep_image_references:
|
||||||
text.append ('!' + attribs['src'])
|
txt = '!' + self.check_halign(style)
|
||||||
|
txt += self.check_valign(style)
|
||||||
|
txt += attribs['src']
|
||||||
|
text.append(txt)
|
||||||
if attribs.has_key('alt'):
|
if attribs.has_key('alt'):
|
||||||
txt = attribs['alt']
|
txt = attribs['alt']
|
||||||
if txt != '':
|
if txt != '':
|
||||||
@ -247,6 +278,7 @@ class TextileMLizer(OEB2HTML):
|
|||||||
text.append('')
|
text.append('')
|
||||||
tags.append(tag)
|
tags.append(tag)
|
||||||
elif tag == 'li':
|
elif tag == 'li':
|
||||||
|
# text.append('\n')
|
||||||
if self.list: li = self.list[-1]
|
if self.list: li = self.list[-1]
|
||||||
else: li = {'name':'ul', 'num':0}
|
else: li = {'name':'ul', 'num':0}
|
||||||
if li['name'] == 'ul': text.append('*'*len(self.list)+' ')
|
if li['name'] == 'ul': text.append('*'*len(self.list)+' ')
|
||||||
@ -273,8 +305,8 @@ class TextileMLizer(OEB2HTML):
|
|||||||
elif tag == 'td':
|
elif tag == 'td':
|
||||||
text.append('|')
|
text.append('|')
|
||||||
txt = ''
|
txt = ''
|
||||||
txt += self.check_align(style, 'text-align', [['left','<'],['justify','<>'],['center','='],['right','>']])
|
txt += self.check_halign(style)
|
||||||
txt += self.check_align(style, 'vertical-align', [['top','^'],['bottom','~']])
|
txt += self.check_valign(style)
|
||||||
if attribs.has_key ('colspan'):
|
if attribs.has_key ('colspan'):
|
||||||
txt += '\\' + attribs['colspan']
|
txt += '\\' + attribs['colspan']
|
||||||
if attribs.has_key ('rowspan'):
|
if attribs.has_key ('rowspan'):
|
||||||
@ -288,7 +320,10 @@ class TextileMLizer(OEB2HTML):
|
|||||||
|
|
||||||
if self.opts.keep_links and attribs.has_key('id'):
|
if self.opts.keep_links and attribs.has_key('id'):
|
||||||
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'):
|
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'):
|
||||||
text.append('(#' + attribs['id'] + ')')
|
if tag == 'span':
|
||||||
|
text.append(' %')
|
||||||
|
tags.append('% ')
|
||||||
|
text.append('(#' + attribs['id'] + u')\xa0')
|
||||||
|
|
||||||
# If wanted process all style tags here - before taxt in tags is written
|
# If wanted process all style tags here - before taxt in tags is written
|
||||||
|
|
||||||
@ -318,11 +353,19 @@ class TextileMLizer(OEB2HTML):
|
|||||||
if self.list: self.list.pop()
|
if self.list: self.list.pop()
|
||||||
else:
|
else:
|
||||||
text.append('%s' % t)
|
text.append('%s' % t)
|
||||||
if t == '*': self.style_bold = False
|
if t == '*':
|
||||||
if t == '_': self.style_italic = False
|
self.style_bold = False
|
||||||
if t == '+': self.style_under = False
|
if t == '_':
|
||||||
if t == '-': self.style_strike = False
|
self.style_italic = False
|
||||||
if t == '&': self.style_smallcap = False
|
if t == '+':
|
||||||
|
self.style_under = False
|
||||||
|
if t == '-':
|
||||||
|
self.style_strike = False
|
||||||
|
if t == '&':
|
||||||
|
self.style_smallcap = False
|
||||||
|
|
||||||
|
# Soft scene breaks.
|
||||||
|
text.append(self.check_padding(style, ['margin-bottom',u'\n\n\xa0']))
|
||||||
|
|
||||||
# Add the text that is outside of the tag.
|
# Add the text that is outside of the tag.
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
'''
|
"""unsmarten : html2textile helper function"""
|
||||||
|
|
||||||
'''
|
|
||||||
|
|
||||||
__version__ = '0.1'
|
__version__ = '0.1'
|
||||||
__author__ = 'Leigh Parry'
|
__author__ = 'Leigh Parry'
|
||||||
@ -102,8 +100,9 @@ def unsmarten(txt):
|
|||||||
txt = re.sub(u'♥|♥|♥', r'{heart}', txt) # heart
|
txt = re.sub(u'♥|♥|♥', r'{heart}', txt) # heart
|
||||||
txt = re.sub(u'♦|♦|♦', r'{diamond}', txt) # diamond
|
txt = re.sub(u'♦|♦|♦', r'{diamond}', txt) # diamond
|
||||||
|
|
||||||
txt = re.sub(u'\xa0', r'p. ', txt) # blank paragraph
|
# Move into main code?
|
||||||
txt = re.sub(u'\n\n\n\n', r'\n\np. \n\n', txt) # blank paragraph
|
# txt = re.sub(u'\xa0', r'p. ', txt) # blank paragraph
|
||||||
txt = re.sub(u'\n \n', r'\n<br />\n', txt) # blank paragraph - br tag
|
# txt = re.sub(u'\n\n\n\n', r'\n\np. \n\n', txt) # blank paragraph
|
||||||
|
# txt = re.sub(u'\n \n', r'\n<br />\n', txt) # blank paragraph - br tag
|
||||||
|
|
||||||
return txt
|
return txt
|
||||||
|
Loading…
x
Reference in New Issue
Block a user