mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
TXT Output: Add optional Textile markup
This commit is contained in:
commit
ae929cf12b
@ -8,7 +8,6 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
OptionRecommendation
|
OptionRecommendation
|
||||||
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
|
||||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||||
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
|
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
|
||||||
|
|
||||||
@ -44,24 +43,32 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Force splitting on the max-line-length value when no space '
|
help=_('Force splitting on the max-line-length value when no space '
|
||||||
'is present. Also allows max-line-length to be below the minimum')),
|
'is present. Also allows max-line-length to be below the minimum')),
|
||||||
OptionRecommendation(name='markdown_format',
|
OptionRecommendation(name='txt_output_formatting',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value='plain',
|
||||||
help=_('Produce Markdown formatted text.')),
|
choices=['plain', 'markdown', 'textile'],
|
||||||
|
help=_('Formatting used within the document.\n'
|
||||||
|
'* plain: Produce plain text.\n'
|
||||||
|
'* markdown: Produce Markdown formatted text.\n'
|
||||||
|
'* textile: Produce Textile formatted text.')),
|
||||||
OptionRecommendation(name='keep_links',
|
OptionRecommendation(name='keep_links',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Do not remove links within the document. This is only ' \
|
help=_('Do not remove links within the document. This is only ' \
|
||||||
'useful when paired with the markdown-format option because' \
|
'useful when paired with a txt-output-formatting option that '
|
||||||
' links are always removed with plain text output.')),
|
'is not none because links are always removed with plain text output.')),
|
||||||
OptionRecommendation(name='keep_image_references',
|
OptionRecommendation(name='keep_image_references',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Do not remove image references within the document. This is only ' \
|
help=_('Do not remove image references within the document. This is only ' \
|
||||||
'useful when paired with the markdown-format option because' \
|
'useful when paired with a txt-output-formatting option that '
|
||||||
' image references are always removed with plain text output.')),
|
'is not none because links are always removed with plain text output.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
if opts.markdown_format:
|
if opts.txt_output_formatting.lower() == 'markdown':
|
||||||
|
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
||||||
writer = MarkdownMLizer(log)
|
writer = MarkdownMLizer(log)
|
||||||
|
elif opts.txt_output_formatting.lower() == 'textile':
|
||||||
|
from calibre.ebooks.txt.textileml import TextileMLizer
|
||||||
|
writer = TextileMLizer(log)
|
||||||
else:
|
else:
|
||||||
writer = TXTMLizer(log)
|
writer = TXTMLizer(log)
|
||||||
|
|
||||||
|
64
src/calibre/ebooks/txt/textileml.py
Normal file
64
src/calibre/ebooks/txt/textileml.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Transform OEB content into Textile formatted plain text
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
|
from calibre.utils.html2textile import html2textile
|
||||||
|
|
||||||
|
class TextileMLizer(object):
|
||||||
|
|
||||||
|
def __init__(self, log):
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
def extract_content(self, oeb_book, opts):
|
||||||
|
self.log.info('Converting XHTML to Textile formatted TXT...')
|
||||||
|
self.oeb_book = oeb_book
|
||||||
|
self.opts = opts
|
||||||
|
|
||||||
|
return self.mlize_spine()
|
||||||
|
|
||||||
|
def mlize_spine(self):
|
||||||
|
output = [u'']
|
||||||
|
|
||||||
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
|
||||||
|
|
||||||
|
html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||||
|
|
||||||
|
if not self.opts.keep_links:
|
||||||
|
html = re.sub(r'<\s*a[^>]*>', '', html)
|
||||||
|
html = re.sub(r'<\s*/\s*a\s*>', '', html)
|
||||||
|
if not self.opts.keep_image_references:
|
||||||
|
html = re.sub(r'<\s*img[^>]*>', '', html)
|
||||||
|
html = re.sub(r'<\s*img\s*>', '', html)
|
||||||
|
|
||||||
|
text = html2textile(html)
|
||||||
|
|
||||||
|
# Ensure the section ends with at least two new line characters.
|
||||||
|
# This is to prevent the last paragraph from a section being
|
||||||
|
# combined into the fist paragraph of the next.
|
||||||
|
end_chars = text[-4:]
|
||||||
|
# Convert all newlines to \n
|
||||||
|
end_chars = end_chars.replace('\r\n', '\n')
|
||||||
|
end_chars = end_chars.replace('\r', '\n')
|
||||||
|
end_chars = end_chars[-2:]
|
||||||
|
if not end_chars[1] == '\n':
|
||||||
|
text += '\n\n'
|
||||||
|
if end_chars[1] == '\n' and not end_chars[0] == '\n':
|
||||||
|
text += '\n'
|
||||||
|
|
||||||
|
output += text
|
||||||
|
|
||||||
|
output = u''.join(output)
|
||||||
|
|
||||||
|
return output
|
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from PyQt4.Qt import Qt
|
|
||||||
|
|
||||||
from calibre.gui2.convert.txt_output_ui import Ui_Form
|
from calibre.gui2.convert.txt_output_ui import Ui_Form
|
||||||
from calibre.gui2.convert import Widget
|
from calibre.gui2.convert import Widget
|
||||||
@ -21,26 +20,14 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['newline', 'max_line_length', 'force_max_line_length',
|
['newline', 'max_line_length', 'force_max_line_length',
|
||||||
'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references',
|
'inline_toc', 'txt_output_formatting', 'keep_links', 'keep_image_references',
|
||||||
'txt_output_encoding'])
|
'txt_output_encoding'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
for x in get_option('newline').option.choices:
|
for x in get_option('newline').option.choices:
|
||||||
self.opt_newline.addItem(x)
|
self.opt_newline.addItem(x)
|
||||||
|
for x in get_option('txt_output_formatting').option.choices:
|
||||||
|
self.opt_txt_output_formatting.addItem(x)
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format)
|
|
||||||
self.enable_markdown_format(self.opt_markdown_format.checkState())
|
|
||||||
|
|
||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
Widget.break_cycles(self)
|
Widget.break_cycles(self)
|
||||||
|
|
||||||
try:
|
|
||||||
self.opt_markdown_format.stateChanged.disconnect()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def enable_markdown_format(self, state):
|
|
||||||
state = state == Qt.Checked
|
|
||||||
self.opt_keep_links.setEnabled(state)
|
|
||||||
self.opt_keep_image_references.setEnabled(state)
|
|
||||||
|
|
||||||
|
@ -6,15 +6,38 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>477</width>
|
<width>392</width>
|
||||||
<height>300</height>
|
<height>346</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout_2">
|
||||||
|
<item>
|
||||||
|
<widget class="QGroupBox" name="groupBox">
|
||||||
|
<property name="title">
|
||||||
|
<string>General</string>
|
||||||
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="0" column="0">
|
<item row="0" column="0">
|
||||||
|
<widget class="QLabel" name="label_3">
|
||||||
|
<property name="text">
|
||||||
|
<string>Output &Encoding:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_txt_output_encoding</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="1">
|
||||||
|
<widget class="EncodingComboBox" name="opt_txt_output_encoding">
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Line ending style:</string>
|
<string>&Line ending style:</string>
|
||||||
@ -24,32 +47,31 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="0" column="1">
|
<item row="1" column="1">
|
||||||
<widget class="QComboBox" name="opt_newline"/>
|
<widget class="QComboBox" name="opt_newline"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="8" column="0">
|
<item row="2" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<widget class="QLabel" name="label_4">
|
||||||
<property name="orientation">
|
|
||||||
<enum>Qt::Vertical</enum>
|
|
||||||
</property>
|
|
||||||
<property name="sizeHint" stdset="0">
|
|
||||||
<size>
|
|
||||||
<width>20</width>
|
|
||||||
<height>246</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
|
||||||
</spacer>
|
|
||||||
</item>
|
|
||||||
<item row="4" column="0" colspan="2">
|
|
||||||
<widget class="QCheckBox" name="opt_inline_toc">
|
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Inline TOC</string>
|
<string>&Formatting:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_txt_output_formatting</cstring>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="1">
|
<item row="2" column="1">
|
||||||
<widget class="QSpinBox" name="opt_max_line_length"/>
|
<widget class="QComboBox" name="opt_txt_output_formatting"/>
|
||||||
</item>
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QGroupBox" name="groupBox_2">
|
||||||
|
<property name="title">
|
||||||
|
<string>Plain</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout_2">
|
||||||
<item row="1" column="0">
|
<item row="1" column="0">
|
||||||
<widget class="QLabel" name="label_2">
|
<widget class="QLabel" name="label_2">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
@ -60,46 +82,47 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0" colspan="2">
|
<item row="1" column="1">
|
||||||
|
<widget class="QSpinBox" name="opt_max_line_length"/>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_force_max_line_length">
|
<widget class="QCheckBox" name="opt_force_max_line_length">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Force maximum line length</string>
|
<string>Force maximum line length</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="opt_markdown_format">
|
<widget class="QCheckBox" name="opt_inline_toc">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Apply Markdown formatting to text</string>
|
<string>&Inline TOC</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0">
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QGroupBox" name="groupBox_3">
|
||||||
|
<property name="title">
|
||||||
|
<string>Markdown, Textile</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_keep_links">
|
<widget class="QCheckBox" name="opt_keep_links">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Do not remove links (<a> tags) before processing</string>
|
<string>Do not remove links (<a> tags) before processing</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="7" column="0">
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_keep_image_references">
|
<widget class="QCheckBox" name="opt_keep_image_references">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Do not remove image references before processing</string>
|
<string>Do not remove image references before processing</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
</layout>
|
||||||
<widget class="QLabel" name="label_3">
|
|
||||||
<property name="text">
|
|
||||||
<string>Output Encoding:</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="2" column="1">
|
|
||||||
<widget class="EncodingComboBox" name="opt_txt_output_encoding">
|
|
||||||
<property name="editable">
|
|
||||||
<bool>true</bool>
|
|
||||||
</property>
|
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
|
209
src/calibre/utils/html2textile.py
Normal file
209
src/calibre/utils/html2textile.py
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright (c) 2010, Webreactor - Marcin Lulek <info@webreactor.eu>
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution.
|
||||||
|
# * Neither the name of the <organization> nor the
|
||||||
|
# names of its contributors may be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||||
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import barename
|
||||||
|
|
||||||
|
class EchoTarget:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.final_output = []
|
||||||
|
self.block = False
|
||||||
|
self.ol_ident = 0
|
||||||
|
self.ul_ident = 0
|
||||||
|
self.list_types = []
|
||||||
|
self.haystack = []
|
||||||
|
|
||||||
|
def start(self, tag, attrib):
|
||||||
|
tag = barename(tag)
|
||||||
|
|
||||||
|
newline = '\n'
|
||||||
|
dot = ''
|
||||||
|
new_tag = ''
|
||||||
|
|
||||||
|
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
|
||||||
|
new_tag = tag
|
||||||
|
dot = '. '
|
||||||
|
elif tag == 'p':
|
||||||
|
new_tag = ''
|
||||||
|
dot = ''
|
||||||
|
elif tag == 'blockquote':
|
||||||
|
new_tag = 'bq'
|
||||||
|
dot = '. '
|
||||||
|
elif tag in ('b', 'strong'):
|
||||||
|
new_tag = '*'
|
||||||
|
newline = ''
|
||||||
|
elif tag in ('em', 'i'):
|
||||||
|
new_tag = '_'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'cite':
|
||||||
|
new_tag = '??'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'del':
|
||||||
|
new_tag = '-'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'ins':
|
||||||
|
new_tag = '+'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'sup':
|
||||||
|
new_tag = '^'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'sub':
|
||||||
|
new_tag = '~'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'span':
|
||||||
|
new_tag = '%'
|
||||||
|
newline = ''
|
||||||
|
elif tag == 'a':
|
||||||
|
self.block = True
|
||||||
|
if 'title' in attrib:
|
||||||
|
self.a_part = {'title':attrib.get('title'),
|
||||||
|
'href':attrib.get('href', '')}
|
||||||
|
else:
|
||||||
|
self.a_part = {'title':None, 'href':attrib.get('href', '')}
|
||||||
|
new_tag = ''
|
||||||
|
newline = ''
|
||||||
|
|
||||||
|
elif tag == 'img':
|
||||||
|
if 'alt' in attrib:
|
||||||
|
new_tag = ' !%s(%s)' % (attrib.get('src'), attrib.get('title'),)
|
||||||
|
else:
|
||||||
|
new_tag = ' !%s' % attrib.get('src')
|
||||||
|
newline = ''
|
||||||
|
|
||||||
|
elif tag in ('ul', 'ol'):
|
||||||
|
new_tag = ''
|
||||||
|
newline = ''
|
||||||
|
self.list_types.append(tag)
|
||||||
|
if tag == 'ul':
|
||||||
|
self.ul_ident += 1
|
||||||
|
else:
|
||||||
|
self.ol_ident += 1
|
||||||
|
|
||||||
|
elif tag == 'li':
|
||||||
|
indent = self.ul_ident + self.ol_ident
|
||||||
|
if self.list_types[-1] == 'ul':
|
||||||
|
new_tag = '*' * indent + ' '
|
||||||
|
newline = '\n'
|
||||||
|
else:
|
||||||
|
new_tag = '#' * indent + ' '
|
||||||
|
newline = '\n'
|
||||||
|
|
||||||
|
|
||||||
|
if tag not in ('ul', 'ol'):
|
||||||
|
textile = '%(newline)s%(tag)s%(dot)s' % \
|
||||||
|
{
|
||||||
|
'newline':newline,
|
||||||
|
'tag':new_tag,
|
||||||
|
'dot':dot
|
||||||
|
}
|
||||||
|
if not self.block:
|
||||||
|
self.final_output.append(textile)
|
||||||
|
else:
|
||||||
|
self.haystack.append(textile)
|
||||||
|
|
||||||
|
def end(self, tag):
|
||||||
|
tag = barename(tag)
|
||||||
|
|
||||||
|
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'):
|
||||||
|
self.final_output.append('\n')
|
||||||
|
elif tag in ('b', 'strong'):
|
||||||
|
self.final_output.append('*')
|
||||||
|
elif tag in ('em', 'i'):
|
||||||
|
self.final_output.append('_')
|
||||||
|
elif tag == 'cite':
|
||||||
|
self.final_output.append('??')
|
||||||
|
elif tag == 'del':
|
||||||
|
self.final_output.append('-')
|
||||||
|
elif tag == 'ins':
|
||||||
|
self.final_output.append('+')
|
||||||
|
elif tag == 'sup':
|
||||||
|
self.final_output.append('^')
|
||||||
|
elif tag == 'sub':
|
||||||
|
self.final_output.append('~')
|
||||||
|
elif tag == 'span':
|
||||||
|
self.final_output.append('%')
|
||||||
|
elif tag == 'a':
|
||||||
|
if self.a_part['title']:
|
||||||
|
textilized = ' "%s (%s)":%s ' % (
|
||||||
|
''.join(self.haystack),
|
||||||
|
self.a_part.get('title'),
|
||||||
|
self.a_part.get('href'),
|
||||||
|
)
|
||||||
|
self.haystack = []
|
||||||
|
else:
|
||||||
|
textilized = ' "%s":%s ' % (
|
||||||
|
''.join(self.haystack),
|
||||||
|
self.a_part.get('href'),
|
||||||
|
)
|
||||||
|
self.haystack = []
|
||||||
|
self.final_output.append(textilized)
|
||||||
|
self.block = False
|
||||||
|
elif tag == 'img':
|
||||||
|
self.final_output.append('!')
|
||||||
|
elif tag == 'ul':
|
||||||
|
self.ul_ident -= 1
|
||||||
|
self.list_types.pop()
|
||||||
|
if len(self.list_types) == 0:
|
||||||
|
self.final_output.append('\n')
|
||||||
|
elif tag == 'ol':
|
||||||
|
self.ol_ident -= 1
|
||||||
|
self.list_types.pop()
|
||||||
|
if len(self.list_types) == 0:
|
||||||
|
self.final_output.append('\n')
|
||||||
|
|
||||||
|
def data(self, data):
|
||||||
|
#we dont want any linebreaks inside our tags
|
||||||
|
node_data = data.replace('\n','')
|
||||||
|
if not self.block:
|
||||||
|
self.final_output.append(node_data)
|
||||||
|
else:
|
||||||
|
self.haystack.append(node_data)
|
||||||
|
|
||||||
|
def comment(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return "closed!"
|
||||||
|
|
||||||
|
|
||||||
|
def html2textile(html):
|
||||||
|
#1st pass
|
||||||
|
#clean the whitespace and convert html to xhtml
|
||||||
|
parser = etree.HTMLParser()
|
||||||
|
tree = etree.fromstring(html, parser)
|
||||||
|
xhtml = etree.tostring(tree, method="xml")
|
||||||
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
|
root = etree.XML(xhtml, parser)
|
||||||
|
cleaned_html = etree.tostring(root)
|
||||||
|
#2nd pass build textile
|
||||||
|
target = EchoTarget()
|
||||||
|
parser = etree.XMLParser(target=target)
|
||||||
|
root = etree.fromstring(cleaned_html, parser)
|
||||||
|
textilized_text = ''.join(target.final_output).lstrip().rstrip()
|
||||||
|
return textilized_text
|
Loading…
x
Reference in New Issue
Block a user