mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: Attempt to detect the input encoding when not specified. Auto detect paragraph structure and formatting markup. FB2 Output: Insert covers. Fixes #8172 (another fb2 problem in 0.7.37)
This commit is contained in:
commit
91ba0d2df4
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class NewJournalOfPhysics(BasicNewsRecipe):
|
class NewJournalOfPhysics(BasicNewsRecipe):
|
||||||
title = u'New Journal of Physics'
|
title = u'New Journal of Physics'
|
||||||
__author__ = u'Chema Cortés'
|
__author__ = u'Chema Cort\xe9s'
|
||||||
description = u'The open-access journal for physics'
|
description = u'The open-access journal for physics'
|
||||||
publisher = u'IOP (Institute of Physics)'
|
publisher = u'IOP (Institute of Physics)'
|
||||||
category = 'physics, journal, science'
|
category = 'physics, journal, science'
|
||||||
|
@ -16,6 +16,7 @@ import uuid
|
|||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import guess_type
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
@ -161,6 +162,23 @@ class FB2MLizer(object):
|
|||||||
text.append('<section>')
|
text.append('<section>')
|
||||||
self.section_level += 1
|
self.section_level += 1
|
||||||
|
|
||||||
|
# Insert the title page / cover into the spine if it is not already referenced.
|
||||||
|
title_name = u''
|
||||||
|
if 'titlepage' in self.oeb_book.guide:
|
||||||
|
title_name = 'titlepage'
|
||||||
|
elif 'cover' in self.oeb_book.guide:
|
||||||
|
title_name = 'cover'
|
||||||
|
if title_name:
|
||||||
|
title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href]
|
||||||
|
if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
|
||||||
|
self.oeb_book.spine.insert(0, title_item, True)
|
||||||
|
# Create xhtml page to reference cover image so it can be used.
|
||||||
|
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
|
||||||
|
id = unicode(self.oeb_book.metadata.cover[0])
|
||||||
|
cover_item = self.oeb_book.manifest.ids[id]
|
||||||
|
if cover_item.media_type in OEB_RASTER_IMAGES:
|
||||||
|
self.insert_image_cover(cover_item.href)
|
||||||
|
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
@ -185,6 +203,17 @@ class FB2MLizer(object):
|
|||||||
|
|
||||||
return ''.join(text) + '</body>'
|
return ''.join(text) + '</body>'
|
||||||
|
|
||||||
|
def insert_image_cover(self, image_href):
|
||||||
|
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||||
|
try:
|
||||||
|
root = etree.fromstring(u'<html xmlns="%s"><body><img src="%s" /></body></html>' % (XHTML_NS, image_href), parser=RECOVER_PARSER)
|
||||||
|
except:
|
||||||
|
root = etree.fromstring(u'', parser=RECOVER_PARSER)
|
||||||
|
|
||||||
|
id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml')
|
||||||
|
item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root)
|
||||||
|
self.oeb_book.spine.insert(0, item, True)
|
||||||
|
|
||||||
def fb2mlize_images(self):
|
def fb2mlize_images(self):
|
||||||
'''
|
'''
|
||||||
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
|
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
|
||||||
|
@ -19,16 +19,27 @@ class PDBInput(InputFormatPlugin):
|
|||||||
file_types = set(['pdb'])
|
file_types = set(['pdb'])
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='single_line_paras', recommended_value=False,
|
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
choices=['auto', 'block', 'single', 'print'],
|
||||||
'With this option it will assume that every line represents '
|
help=_('Paragraph structure.\n'
|
||||||
'a paragraph instead.')),
|
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
|
||||||
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
'* auto: Try to auto detect paragraph type.\n'
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
'* block: Treat a blank line as a paragraph break.\n'
|
||||||
'With this option it will assume that every line starting with '
|
'* single: Assume every line is a paragraph.\n'
|
||||||
'an indent (either a tab or 2+ spaces) represents a paragraph. '
|
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||||
'Paragraphs end when the next line that starts with an indent '
|
'starts a paragraph.')),
|
||||||
'is reached.')),
|
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||||
|
choices=['auto', 'none', 'markdown'],
|
||||||
|
help=_('Formatting used within the document.'
|
||||||
|
'* auto: Try to auto detect the document formatting.\n'
|
||||||
|
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
|
||||||
|
'* markdown: Run the input though the markdown pre-processor. '
|
||||||
|
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||||
|
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||||
|
help=_('Normally extra spaces are condensed into a single space. '
|
||||||
|
'With this option all spaces will be displayed.')),
|
||||||
|
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
||||||
|
help=_('Do not insert a Table of Contents into the output text.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
@ -22,7 +22,7 @@ class PDBOutput(OutputFormatPlugin):
|
|||||||
short_switch='f', choices=FORMAT_WRITERS.keys(),
|
short_switch='f', choices=FORMAT_WRITERS.keys(),
|
||||||
help=(_('Format to use inside the pdb container. Choices are:')+\
|
help=(_('Format to use inside the pdb container. Choices are:')+\
|
||||||
' %s' % FORMAT_WRITERS.keys())),
|
' %s' % FORMAT_WRITERS.keys())),
|
||||||
OptionRecommendation(name='output_encoding', recommended_value='cp1252',
|
OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252',
|
||||||
level=OptionRecommendation.LOW,
|
level=OptionRecommendation.LOW,
|
||||||
help=_('Specify the character encoding of the output document. ' \
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
'The default is cp1252. Note: This option is not honored by all ' \
|
'The default is cp1252. Note: This option is not honored by all ' \
|
||||||
|
@ -8,12 +8,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
|
||||||
|
|
||||||
class HeaderRecord(object):
|
class HeaderRecord(object):
|
||||||
'''
|
'''
|
||||||
@ -33,9 +32,7 @@ class Reader(FormatReader):
|
|||||||
def __init__(self, header, stream, log, options):
|
def __init__(self, header, stream, log, options):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = options.input_encoding
|
self.options = options
|
||||||
self.single_line_paras = options.single_line_paras
|
|
||||||
self.print_formatted_paras = options.print_formatted_paras
|
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -48,34 +45,29 @@ class Reader(FormatReader):
|
|||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if self.header_record.compression == 1:
|
if self.header_record.compression == 1:
|
||||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
return self.section_data(number)
|
||||||
if self.header_record.compression == 2 or self.header_record.compression == 258:
|
if self.header_record.compression == 2 or self.header_record.compression == 258:
|
||||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
return decompress_doc(self.section_data(number))
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
txt = ''
|
raw_txt = ''
|
||||||
|
|
||||||
self.log.info('Decompressing text...')
|
self.log.info('Decompressing text...')
|
||||||
for i in range(1, self.header_record.num_records + 1):
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
self.log.debug('\tDecompressing text section %i' % i)
|
self.log.debug('\tDecompressing text section %i' % i)
|
||||||
txt += self.decompress_text(i)
|
raw_txt += self.decompress_text(i)
|
||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
if self.single_line_paras:
|
stream = StringIO(raw_txt)
|
||||||
txt = separate_paragraphs_single_line(txt)
|
|
||||||
if self.print_formatted_paras:
|
|
||||||
txt = separate_paragraphs_print_formatted(txt)
|
|
||||||
html = convert_basic(txt)
|
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
|
||||||
index.write(html.encode('utf-8'))
|
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
mi = get_metadata(self.stream, 'pdb')
|
|
||||||
manifest = [('index.html', None)]
|
|
||||||
spine = ['index.html']
|
|
||||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
|
||||||
|
|
||||||
return os.path.join(output_dir, 'metadata.opf')
|
txt_plugin = plugin_for_input_format('txt')
|
||||||
|
for option in txt_plugin.options:
|
||||||
|
if not hasattr(self.options, option.option.name):
|
||||||
|
setattr(self.options, option.name, option.recommended_value)
|
||||||
|
|
||||||
|
stream.seek(0)
|
||||||
|
return txt_plugin.convert(stream, self.options, 'txt', self.log, {})
|
||||||
|
@ -50,7 +50,8 @@ class Writer(FormatWriter):
|
|||||||
txt = writer.extract_content(oeb_book, self.opts)
|
txt = writer.extract_content(oeb_book, self.opts)
|
||||||
|
|
||||||
self.log.debug('\tReplacing newlines with selected type...')
|
self.log.debug('\tReplacing newlines with selected type...')
|
||||||
txt = specified_newlines(TxtNewlines('windows').newline, txt).encode(self.opts.output_encoding, 'replace')
|
txt = specified_newlines(TxtNewlines('windows').newline,
|
||||||
|
txt).encode(self.opts.pdb_output_encoding, 'replace')
|
||||||
|
|
||||||
txt_length = len(txt)
|
txt_length = len(txt)
|
||||||
|
|
||||||
|
@ -8,12 +8,13 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, struct, zlib
|
import struct
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||||
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
|
||||||
|
|
||||||
SUPPORTED_VERSION = (1, 40)
|
SUPPORTED_VERSION = (1, 40)
|
||||||
|
|
||||||
@ -38,9 +39,7 @@ class Reader(FormatReader):
|
|||||||
def __init__(self, header, stream, log, options):
|
def __init__(self, header, stream, log, options):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = options.input_encoding
|
self.options = options
|
||||||
self.single_line_paras = options.single_line_paras
|
|
||||||
self.print_formatted_paras = options.print_formatted_paras
|
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -68,30 +67,25 @@ class Reader(FormatReader):
|
|||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if number == 1:
|
if number == 1:
|
||||||
self.uncompressor = zlib.decompressobj()
|
self.uncompressor = zlib.decompressobj()
|
||||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
return self.uncompressor.decompress(self.section_data(number))
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
txt = ''
|
raw_txt = ''
|
||||||
|
|
||||||
self.log.info('Decompressing text...')
|
self.log.info('Decompressing text...')
|
||||||
for i in range(1, self.header_record.num_records + 1):
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
self.log.debug('\tDecompressing text section %i' % i)
|
self.log.debug('\tDecompressing text section %i' % i)
|
||||||
txt += self.decompress_text(i)
|
raw_txt += self.decompress_text(i)
|
||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
if self.single_line_paras:
|
stream = StringIO(raw_txt)
|
||||||
txt = separate_paragraphs_single_line(txt)
|
|
||||||
if self.print_formatted_paras:
|
|
||||||
txt = separate_paragraphs_print_formatted(txt)
|
|
||||||
html = convert_basic(txt)
|
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
|
||||||
index.write(html.encode('utf-8'))
|
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
mi = get_metadata(self.stream, 'pdb')
|
|
||||||
manifest = [('index.html', None)]
|
|
||||||
spine = ['index.html']
|
|
||||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
|
||||||
|
|
||||||
return os.path.join(output_dir, 'metadata.opf')
|
txt_plugin = plugin_for_input_format('txt')
|
||||||
|
for option in txt_plugin.options:
|
||||||
|
if not hasattr(self.options, option.option.name):
|
||||||
|
setattr(self.options, option.name, option.recommended_value)
|
||||||
|
|
||||||
|
stream.seek(0)
|
||||||
|
return txt_plugin.convert(stream, self.options, 'txt', self.log, {})
|
||||||
|
@ -22,12 +22,12 @@ class Writer(FormatWriter):
|
|||||||
def __init__(self, opts, log):
|
def __init__(self, opts, log):
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
self.log = log
|
self.log = log
|
||||||
|
|
||||||
def write_content(self, oeb_book, out_stream, metadata=None):
|
def write_content(self, oeb_book, out_stream, metadata=None):
|
||||||
title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
|
title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
|
||||||
|
|
||||||
txt_records, txt_length = self._generate_text(oeb_book)
|
txt_records, txt_length = self._generate_text(oeb_book)
|
||||||
|
|
||||||
crc32 = 0
|
crc32 = 0
|
||||||
section_lengths = []
|
section_lengths = []
|
||||||
compressor = zlib.compressobj(9)
|
compressor = zlib.compressobj(9)
|
||||||
@ -41,32 +41,33 @@ class Writer(FormatWriter):
|
|||||||
|
|
||||||
header_record = self._header_record(txt_length, len(txt_records), crc32)
|
header_record = self._header_record(txt_length, len(txt_records), crc32)
|
||||||
section_lengths.insert(0, len(header_record))
|
section_lengths.insert(0, len(header_record))
|
||||||
|
|
||||||
out_stream.seek(0)
|
out_stream.seek(0)
|
||||||
hb = PdbHeaderBuilder('zTXTGPlm', title)
|
hb = PdbHeaderBuilder('zTXTGPlm', title)
|
||||||
hb.build_header(section_lengths, out_stream)
|
hb.build_header(section_lengths, out_stream)
|
||||||
|
|
||||||
for record in [header_record]+txt_records:
|
for record in [header_record]+txt_records:
|
||||||
out_stream.write(record)
|
out_stream.write(record)
|
||||||
|
|
||||||
def _generate_text(self, oeb_book):
|
def _generate_text(self, oeb_book):
|
||||||
writer = TXTMLizer(self.log)
|
writer = TXTMLizer(self.log)
|
||||||
txt = writer.extract_content(oeb_book, self.opts)
|
txt = writer.extract_content(oeb_book, self.opts)
|
||||||
|
|
||||||
self.log.debug('\tReplacing newlines with selected type...')
|
self.log.debug('\tReplacing newlines with selected type...')
|
||||||
txt = specified_newlines(TxtNewlines('windows').newline, txt).encode(self.opts.output_encoding, 'replace')
|
txt = specified_newlines(TxtNewlines('windows').newline,
|
||||||
|
txt).encode(self.opts.pdb_output_encoding, 'replace')
|
||||||
|
|
||||||
txt_length = len(txt)
|
txt_length = len(txt)
|
||||||
|
|
||||||
txt_records = []
|
txt_records = []
|
||||||
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
|
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
|
||||||
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
|
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
|
||||||
|
|
||||||
return txt_records, txt_length
|
return txt_records, txt_length
|
||||||
|
|
||||||
def _header_record(self, txt_length, record_count, crc32):
|
def _header_record(self, txt_length, record_count, crc32):
|
||||||
record = ''
|
record = ''
|
||||||
|
|
||||||
record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44
|
record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44
|
||||||
record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book.
|
record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book.
|
||||||
record += struct.pack('>L', txt_length) # [4:8], Uncompressed length of the entire text of the book.
|
record += struct.pack('>L', txt_length) # [4:8], Uncompressed length of the entire text of the book.
|
||||||
@ -79,6 +80,6 @@ class Writer(FormatWriter):
|
|||||||
record += struct.pack('>B', 0) # [19:20], Reserved.
|
record += struct.pack('>B', 0) # [19:20], Reserved.
|
||||||
record += struct.pack('>L', crc32) # [20:24], crc32
|
record += struct.pack('>L', crc32) # [20:24], crc32
|
||||||
record += struct.pack('>LL', 0, 0) # [24:32], padding
|
record += struct.pack('>LL', 0, 0) # [24:32], padding
|
||||||
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
file_type = 'pmlz'
|
file_type = 'pmlz'
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='output_encoding', recommended_value='cp1252',
|
OptionRecommendation(name='pml_output_encoding', recommended_value='cp1252',
|
||||||
level=OptionRecommendation.LOW,
|
level=OptionRecommendation.LOW,
|
||||||
help=_('Specify the character encoding of the output document. ' \
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
'The default is cp1252.')),
|
'The default is cp1252.')),
|
||||||
@ -48,7 +48,7 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
pmlmlizer = PMLMLizer(log)
|
pmlmlizer = PMLMLizer(log)
|
||||||
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
||||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||||
out.write(pml.encode(opts.output_encoding, 'replace'))
|
out.write(pml.encode(opts.pml_output_encoding, 'replace'))
|
||||||
|
|
||||||
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts)
|
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts)
|
||||||
|
|
||||||
|
@ -4,11 +4,9 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
|
||||||
from calibre.ebooks.compression.tcr import decompress
|
from calibre.ebooks.compression.tcr import decompress
|
||||||
|
|
||||||
class TCRInput(InputFormatPlugin):
|
class TCRInput(InputFormatPlugin):
|
||||||
@ -19,36 +17,43 @@ class TCRInput(InputFormatPlugin):
|
|||||||
file_types = set(['tcr'])
|
file_types = set(['tcr'])
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='single_line_paras', recommended_value=False,
|
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
choices=['auto', 'block', 'single', 'print'],
|
||||||
'With this option it will assume that every line represents '
|
help=_('Paragraph structure.\n'
|
||||||
'a paragraph instead.')),
|
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
|
||||||
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
'* auto: Try to auto detect paragraph type.\n'
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
'* block: Treat a blank line as a paragraph break.\n'
|
||||||
'With this option it will assume that every line starting with '
|
'* single: Assume every line is a paragraph.\n'
|
||||||
'an indent (either a tab or 2+ spaces) represents a paragraph. '
|
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||||
'Paragraphs end when the next line that starts with an indent '
|
'starts a paragraph.')),
|
||||||
'is reached.')),
|
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||||
|
choices=['auto', 'none', 'markdown'],
|
||||||
|
help=_('Formatting used within the document.'
|
||||||
|
'* auto: Try to auto detect the document formatting.\n'
|
||||||
|
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
|
||||||
|
'* markdown: Run the input though the markdown pre-processor. '
|
||||||
|
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||||
|
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||||
|
help=_('Normally extra spaces are condensed into a single space. '
|
||||||
|
'With this option all spaces will be displayed.')),
|
||||||
|
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
||||||
|
help=_('Do not insert a Table of Contents into the output text.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
log.info('Decompressing text...')
|
log.info('Decompressing text...')
|
||||||
ienc = options.input_encoding if options.input_encoding else 'utf-8'
|
raw_txt = decompress(stream)
|
||||||
txt = decompress(stream).decode(ienc, 'replace')
|
|
||||||
|
|
||||||
log.info('Converting text to OEB...')
|
log.info('Converting text to OEB...')
|
||||||
if options.single_line_paras:
|
stream = StringIO(raw_txt)
|
||||||
txt = separate_paragraphs_single_line(txt)
|
|
||||||
if options.print_formatted_paras:
|
|
||||||
txt = separate_paragraphs_print_formatted(txt)
|
|
||||||
html = convert_basic(txt)
|
|
||||||
with open(os.path.join(os.getcwd(), 'index.html'), 'wb') as index:
|
|
||||||
index.write(html.encode('utf-8'))
|
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
mi = get_metadata(stream, 'tcr')
|
|
||||||
manifest = [('index.html', None)]
|
|
||||||
spine = ['index.html']
|
|
||||||
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
|
|
||||||
|
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
txt_plugin = plugin_for_input_format('txt')
|
||||||
|
for option in txt_plugin.options:
|
||||||
|
if not hasattr(options, option.option.name):
|
||||||
|
setattr(options, option.name, option.recommended_value)
|
||||||
|
|
||||||
|
stream.seek(0)
|
||||||
|
return txt_plugin.convert(stream, options,
|
||||||
|
'txt', log, accelerators)
|
||||||
|
@ -18,7 +18,7 @@ class TCROutput(OutputFormatPlugin):
|
|||||||
file_type = 'tcr'
|
file_type = 'tcr'
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='output_encoding', recommended_value='utf-8',
|
OptionRecommendation(name='tcr_output_encoding', recommended_value='utf-8',
|
||||||
level=OptionRecommendation.LOW,
|
level=OptionRecommendation.LOW,
|
||||||
help=_('Specify the character encoding of the output document. ' \
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
'The default is utf-8.')),
|
'The default is utf-8.')),
|
||||||
@ -40,7 +40,7 @@ class TCROutput(OutputFormatPlugin):
|
|||||||
setattr(opts, 'indent_paras', False)
|
setattr(opts, 'indent_paras', False)
|
||||||
|
|
||||||
writer = TXTMLizer(log)
|
writer = TXTMLizer(log)
|
||||||
txt = writer.extract_content(oeb_book, opts).encode(opts.output_encoding, 'replace')
|
txt = writer.extract_content(oeb_book, opts).encode(opts.tcr_output_encoding, 'replace')
|
||||||
|
|
||||||
log.info('Compressing text...')
|
log.info('Compressing text...')
|
||||||
txt = compress(txt)
|
txt = compress(txt)
|
||||||
|
@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre.ebooks.chardet import detect
|
||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||||
preserve_spaces
|
preserve_spaces, detect_paragraph_type, detect_formatting_type
|
||||||
from calibre import _ent_pat, xml_entity_to_unicode
|
from calibre import _ent_pat, xml_entity_to_unicode
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
@ -20,45 +21,57 @@ class TXTInput(InputFormatPlugin):
|
|||||||
file_types = set(['txt'])
|
file_types = set(['txt'])
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='single_line_paras', recommended_value=False,
|
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
choices=['auto', 'block', 'single', 'print'],
|
||||||
'With this option it will assume that every line represents '
|
help=_('Paragraph structure.\n'
|
||||||
'a paragraph instead.')),
|
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
|
||||||
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
'* auto: Try to auto detect paragraph type.\n'
|
||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
'* block: Treat a blank line as a paragraph break.\n'
|
||||||
'With this option it will assume that every line starting with '
|
'* single: Assume every line is a paragraph.\n'
|
||||||
'an indent (either a tab or 2+ spaces) represents a paragraph. '
|
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||||
'Paragraphs end when the next line that starts with an indent '
|
'starts a paragraph.')),
|
||||||
'is reached.')),
|
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||||
|
choices=['auto', 'none', 'markdown'],
|
||||||
|
help=_('Formatting used within the document.'
|
||||||
|
'* auto: Try to auto detect the document formatting.\n'
|
||||||
|
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
|
||||||
|
'* markdown: Run the input though the markdown pre-processor. '
|
||||||
|
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||||
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||||
help=_('Normally extra spaces are condensed into a single space. '
|
help=_('Normally extra spaces are condensed into a single space. '
|
||||||
'With this option all spaces will be displayed.')),
|
'With this option all spaces will be displayed.')),
|
||||||
OptionRecommendation(name='markdown', recommended_value=False,
|
|
||||||
help=_('Run the text input through the markdown pre-processor. To '
|
|
||||||
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
|
||||||
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
||||||
help=_('Do not insert a Table of Contents into the output text.')),
|
help=_('Do not insert a Table of Contents into the output text.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
ienc = stream.encoding if stream.encoding else 'utf-8'
|
log.debug('Reading text from file...')
|
||||||
|
|
||||||
|
txt = stream.read()
|
||||||
|
# Get the encoding of the document.
|
||||||
if options.input_encoding:
|
if options.input_encoding:
|
||||||
ienc = options.input_encoding
|
ienc = options.input_encoding
|
||||||
log.debug('Reading text from file...')
|
log.debug('Using user specified input encoding of %s' % ienc)
|
||||||
txt = stream.read().decode(ienc, 'replace')
|
else:
|
||||||
|
det_encoding = detect(txt)
|
||||||
# Adjust paragraph formatting as requested
|
ienc = det_encoding['encoding']
|
||||||
if options.single_line_paras:
|
log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, det_encoding['confidence'] * 100))
|
||||||
txt = separate_paragraphs_single_line(txt)
|
if not ienc:
|
||||||
if options.print_formatted_paras:
|
ienc = 'utf-8'
|
||||||
txt = separate_paragraphs_print_formatted(txt)
|
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
|
||||||
if options.preserve_spaces:
|
txt = txt.decode(ienc, 'replace')
|
||||||
txt = preserve_spaces(txt)
|
|
||||||
|
|
||||||
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
|
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
|
||||||
|
# Preserve spaces will replace multiple spaces to a space
|
||||||
|
# followed by the entity.
|
||||||
|
if options.preserve_spaces:
|
||||||
|
txt = preserve_spaces(txt)
|
||||||
|
|
||||||
|
if options.formatting_type == 'auto':
|
||||||
|
options.formatting_type = detect_formatting_type(txt)
|
||||||
|
|
||||||
if options.markdown:
|
if options.formatting_type == 'markdown':
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
try:
|
try:
|
||||||
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
|
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
|
||||||
@ -66,6 +79,22 @@ class TXTInput(InputFormatPlugin):
|
|||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
else:
|
else:
|
||||||
|
# Determine the paragraph type of the document.
|
||||||
|
if options.paragraph_type == 'auto':
|
||||||
|
options.paragraph_type = detect_paragraph_type(txt)
|
||||||
|
if options.paragraph_type == 'unknown':
|
||||||
|
log.debug('Could not reliably determine paragraph type using block')
|
||||||
|
options.paragraph_type = 'block'
|
||||||
|
else:
|
||||||
|
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
||||||
|
|
||||||
|
# We don't check for block because the processor assumes block.
|
||||||
|
# single and print at transformed to block for processing.
|
||||||
|
if options.paragraph_type == 'single':
|
||||||
|
txt = separate_paragraphs_single_line(txt)
|
||||||
|
elif options.paragraph_type == 'print':
|
||||||
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
|
|
||||||
flow_size = getattr(options, 'flow_size', 0)
|
flow_size = getattr(options, 'flow_size', 0)
|
||||||
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
||||||
|
|
||||||
@ -85,11 +114,10 @@ class TXTInput(InputFormatPlugin):
|
|||||||
htmlfile = open(fname, 'wb')
|
htmlfile = open(fname, 'wb')
|
||||||
with htmlfile:
|
with htmlfile:
|
||||||
htmlfile.write(html.encode('utf-8'))
|
htmlfile.write(html.encode('utf-8'))
|
||||||
cwd = os.getcwdu()
|
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
oeb = html_input(open(htmlfile.name, 'rb'), options, 'html', log,
|
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
|
||||||
{}, cwd)
|
{})
|
||||||
options.debug_pipeline = odi
|
options.debug_pipeline = odi
|
||||||
os.remove(htmlfile.name)
|
os.remove(htmlfile.name)
|
||||||
return oeb
|
return oeb
|
||||||
|
@ -26,7 +26,7 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
||||||
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
||||||
'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
|
'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
|
||||||
OptionRecommendation(name='output_encoding', recommended_value='utf-8',
|
OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
|
||||||
level=OptionRecommendation.LOW,
|
level=OptionRecommendation.LOW,
|
||||||
help=_('Specify the character encoding of the output document. ' \
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
'The default is utf-8.')),
|
'The default is utf-8.')),
|
||||||
@ -64,7 +64,7 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
writer = MarkdownMLizer(log)
|
writer = MarkdownMLizer(log)
|
||||||
else:
|
else:
|
||||||
writer = TXTMLizer(log)
|
writer = TXTMLizer(log)
|
||||||
|
|
||||||
txt = writer.extract_content(oeb_book, opts)
|
txt = writer.extract_content(oeb_book, opts)
|
||||||
|
|
||||||
log.debug('\tReplacing newlines with selected type...')
|
log.debug('\tReplacing newlines with selected type...')
|
||||||
@ -81,7 +81,7 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
out_stream.seek(0)
|
out_stream.seek(0)
|
||||||
out_stream.truncate()
|
out_stream.truncate()
|
||||||
out_stream.write(txt.encode(opts.output_encoding, 'replace'))
|
out_stream.write(txt.encode(opts.txt_output_encoding, 'replace'))
|
||||||
|
|
||||||
if close:
|
if close:
|
||||||
out_stream.close()
|
out_stream.close()
|
||||||
|
@ -49,7 +49,6 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
|||||||
if isbytestring(txt):
|
if isbytestring(txt):
|
||||||
txt = txt.decode('utf-8')
|
txt = txt.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
# Split into paragraphs based on having a blank line between text.
|
# Split into paragraphs based on having a blank line between text.
|
||||||
for line in txt.split('\n\n'):
|
for line in txt.split('\n\n'):
|
||||||
@ -94,3 +93,54 @@ def split_string_separator(txt, size) :
|
|||||||
xrange(0, len(txt), size)])
|
xrange(0, len(txt), size)])
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
def detect_paragraph_type(txt):
|
||||||
|
'''
|
||||||
|
Tries to determine the formatting of the document.
|
||||||
|
|
||||||
|
block: Paragraphs are separated by a blank line.
|
||||||
|
single: Each line is a paragraph.
|
||||||
|
print: Each paragraph starts with a 2+ spaces or a tab
|
||||||
|
and ends when a new paragraph is reached.
|
||||||
|
markdown: Markdown formatting is in the document.
|
||||||
|
|
||||||
|
returns block, single, print, markdown
|
||||||
|
'''
|
||||||
|
txt = txt.replace('\r\n', '\n')
|
||||||
|
txt = txt.replace('\r', '\n')
|
||||||
|
txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
|
||||||
|
|
||||||
|
# Check for print
|
||||||
|
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
|
||||||
|
if tab_line_count / float(txt_line_count) >= .25:
|
||||||
|
return 'print'
|
||||||
|
|
||||||
|
# Check for block
|
||||||
|
empty_line_count = len(re.findall('(?mu)^\s*$', txt))
|
||||||
|
if empty_line_count / float(txt_line_count) >= .25:
|
||||||
|
return 'block'
|
||||||
|
|
||||||
|
# Nothing else matched to assume single.
|
||||||
|
return 'single'
|
||||||
|
|
||||||
|
def detect_formatting_type(txt):
|
||||||
|
# Check for markdown
|
||||||
|
# Headings
|
||||||
|
if len(re.findall('(?mu)^#+', txt)) >= 5:
|
||||||
|
return 'markdown'
|
||||||
|
if len(re.findall('(?mu)^=+$', txt)) >= 5:
|
||||||
|
return 'markdown'
|
||||||
|
if len(re.findall('(?mu)^-+$', txt)) >= 5:
|
||||||
|
return 'markdown'
|
||||||
|
# Images
|
||||||
|
if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
|
||||||
|
return 'markdown'
|
||||||
|
# Links
|
||||||
|
if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
|
||||||
|
return 'markdown'
|
||||||
|
# Escaped characters
|
||||||
|
md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
|
||||||
|
for c in md_escapted_characters:
|
||||||
|
if txt.count('\\'+c) > 10:
|
||||||
|
return 'markdown'
|
||||||
|
|
||||||
|
return 'none'
|
||||||
|
@ -192,6 +192,11 @@ class Widget(QWidget):
|
|||||||
if not val: val = ''
|
if not val: val = ''
|
||||||
getattr(g, 'setPlainText', g.setText)(val)
|
getattr(g, 'setPlainText', g.setText)(val)
|
||||||
getattr(g, 'setCursorPosition', lambda x: x)(0)
|
getattr(g, 'setCursorPosition', lambda x: x)(0)
|
||||||
|
elif isinstance(g, EncodingComboBox):
|
||||||
|
if val:
|
||||||
|
g.setEditText(val)
|
||||||
|
else:
|
||||||
|
g.setCurrentIndex(0)
|
||||||
elif isinstance(g, QComboBox) and val:
|
elif isinstance(g, QComboBox) and val:
|
||||||
idx = g.findText(val, Qt.MatchFixedString)
|
idx = g.findText(val, Qt.MatchFixedString)
|
||||||
if idx < 0:
|
if idx < 0:
|
||||||
@ -202,8 +207,6 @@ class Widget(QWidget):
|
|||||||
g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
|
g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
|
||||||
elif isinstance(g, (XPathEdit, RegexEdit)):
|
elif isinstance(g, (XPathEdit, RegexEdit)):
|
||||||
g.edit.setText(val if val else '')
|
g.edit.setText(val if val else '')
|
||||||
elif isinstance(g, EncodingComboBox):
|
|
||||||
g.setEditText(val if val else '')
|
|
||||||
else:
|
else:
|
||||||
raise Exception('Can\'t set value %s in %s'%(repr(val),
|
raise Exception('Can\'t set value %s in %s'%(repr(val),
|
||||||
unicode(g.objectName())))
|
unicode(g.objectName())))
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.gui2.convert.pdb_input_ui import Ui_Form
|
from calibre.gui2.convert.txt_input_ui import Ui_Form
|
||||||
from calibre.gui2.convert import Widget
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
class PluginWidget(Widget, Ui_Form):
|
class PluginWidget(Widget, Ui_Form):
|
||||||
@ -12,10 +12,14 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
TITLE = _('PDB Input')
|
TITLE = _('PDB Input')
|
||||||
HELP = _('Options specific to')+' PDB '+_('input')
|
HELP = _('Options specific to')+' PDB '+_('input')
|
||||||
COMMIT_NAME = 'pdb_input'
|
COMMIT_NAME = 'pdb_input'
|
||||||
ICON = I('mimetypes/unknown.png')
|
ICON = I('mimetypes/txt.png')
|
||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['single_line_paras', 'print_formatted_paras'])
|
['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
|
for x in get_option('paragraph_type').option.choices:
|
||||||
|
self.opt_paragraph_type.addItem(x)
|
||||||
|
for x in get_option('formatting_type').option.choices:
|
||||||
|
self.opt_formatting_type.addItem(x)
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -1,48 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<ui version="4.0">
|
|
||||||
<class>Form</class>
|
|
||||||
<widget class="QWidget" name="Form">
|
|
||||||
<property name="geometry">
|
|
||||||
<rect>
|
|
||||||
<x>0</x>
|
|
||||||
<y>0</y>
|
|
||||||
<width>400</width>
|
|
||||||
<height>300</height>
|
|
||||||
</rect>
|
|
||||||
</property>
|
|
||||||
<property name="windowTitle">
|
|
||||||
<string>Form</string>
|
|
||||||
</property>
|
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
|
||||||
<item row="2" column="0">
|
|
||||||
<spacer name="verticalSpacer">
|
|
||||||
<property name="orientation">
|
|
||||||
<enum>Qt::Vertical</enum>
|
|
||||||
</property>
|
|
||||||
<property name="sizeHint" stdset="0">
|
|
||||||
<size>
|
|
||||||
<width>20</width>
|
|
||||||
<height>213</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
|
||||||
</spacer>
|
|
||||||
</item>
|
|
||||||
<item row="0" column="0">
|
|
||||||
<widget class="QCheckBox" name="opt_single_line_paras">
|
|
||||||
<property name="text">
|
|
||||||
<string>Treat each &line as a paragraph</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="1" column="0">
|
|
||||||
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
|
||||||
<property name="text">
|
|
||||||
<string>Assume print formatting</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
</layout>
|
|
||||||
</widget>
|
|
||||||
<resources/>
|
|
||||||
<connections/>
|
|
||||||
</ui>
|
|
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
ICON = I('mimetypes/unknown.png')
|
ICON = I('mimetypes/unknown.png')
|
||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, ['format', 'inline_toc', 'output_encoding'])
|
Widget.__init__(self, parent, ['format', 'inline_toc', 'pdb_output_encoding'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
@ -55,10 +55,21 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="1">
|
<item row="1" column="1">
|
||||||
<widget class="QLineEdit" name="opt_output_encoding"/>
|
<widget class="EncodingComboBox" name="opt_pdb_output_encoding">
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
|
<customwidgets>
|
||||||
|
<customwidget>
|
||||||
|
<class>EncodingComboBox</class>
|
||||||
|
<extends>QComboBox</extends>
|
||||||
|
<header>widgets.h</header>
|
||||||
|
</customwidget>
|
||||||
|
</customwidgets>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections/>
|
<connections/>
|
||||||
</ui>
|
</ui>
|
||||||
|
@ -18,6 +18,6 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, ['inline_toc', 'full_image_depth',
|
Widget.__init__(self, parent, ['inline_toc', 'full_image_depth',
|
||||||
'output_encoding'])
|
'pml_output_encoding'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="3" column="0">
|
<item row="4" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -27,32 +27,47 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="2" column="0">
|
||||||
<widget class="QCheckBox" name="opt_inline_toc">
|
<widget class="QCheckBox" name="opt_inline_toc">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Inline TOC</string>
|
<string>&Inline TOC</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QCheckBox" name="opt_full_image_depth">
|
<widget class="QCheckBox" name="opt_full_image_depth">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Do not reduce image size and depth</string>
|
<string>Do not reduce image size and depth</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="0" column="0">
|
<item row="1" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||||
<property name="text">
|
<item>
|
||||||
<string>Output Encoding:</string>
|
<widget class="QLabel" name="label">
|
||||||
</property>
|
<property name="text">
|
||||||
</widget>
|
<string>Output Encoding:</string>
|
||||||
</item>
|
</property>
|
||||||
<item row="0" column="1">
|
</widget>
|
||||||
<widget class="QLineEdit" name="opt_output_encoding"/>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="EncodingComboBox" name="opt_pml_output_encoding">
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
|
<customwidgets>
|
||||||
|
<customwidget>
|
||||||
|
<class>EncodingComboBox</class>
|
||||||
|
<extends>QComboBox</extends>
|
||||||
|
<header>widgets.h</header>
|
||||||
|
</customwidget>
|
||||||
|
</customwidgets>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections/>
|
<connections/>
|
||||||
</ui>
|
</ui>
|
||||||
|
25
src/calibre/gui2/convert/tcr_input.py
Normal file
25
src/calibre/gui2/convert/tcr_input.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.gui2.convert.txt_input_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('TCR Input')
|
||||||
|
HELP = _('Options specific to')+' TCR '+_('input')
|
||||||
|
COMMIT_NAME = 'tcr_input'
|
||||||
|
ICON = I('mimetypes/txt.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent,
|
||||||
|
['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
for x in get_option('paragraph_type').option.choices:
|
||||||
|
self.opt_paragraph_type.addItem(x)
|
||||||
|
for x in get_option('formatting_type').option.choices:
|
||||||
|
self.opt_formatting_type.addItem(x)
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
@ -16,7 +16,10 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['single_line_paras', 'print_formatted_paras', 'markdown',
|
['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
|
||||||
'markdown_disable_toc', 'preserve_spaces'])
|
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
|
for x in get_option('paragraph_type').option.choices:
|
||||||
|
self.opt_paragraph_type.addItem(x)
|
||||||
|
for x in get_option('formatting_type').option.choices:
|
||||||
|
self.opt_formatting_type.addItem(x)
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>470</width>
|
<width>518</width>
|
||||||
<height>300</height>
|
<height>300</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
@ -15,47 +15,23 @@
|
|||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="0" column="0">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="opt_single_line_paras">
|
<widget class="QLabel" name="label_2">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Treat each &line as a paragraph</string>
|
<string>Paragraph style:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="0" column="1">
|
||||||
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
<widget class="QComboBox" name="opt_paragraph_type"/>
|
||||||
|
</item>
|
||||||
|
<item row="5" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_preserve_spaces">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Assume print formatting</string>
|
<string>Preserve &spaces</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="6" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_markdown">
|
|
||||||
<property name="text">
|
|
||||||
<string>Process using markdown</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="3" column="0">
|
|
||||||
<widget class="QLabel" name="label">
|
|
||||||
<property name="text">
|
|
||||||
<string><p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.</string>
|
|
||||||
</property>
|
|
||||||
<property name="wordWrap">
|
|
||||||
<bool>true</bool>
|
|
||||||
</property>
|
|
||||||
<property name="openExternalLinks">
|
|
||||||
<bool>true</bool>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="4" column="0">
|
|
||||||
<widget class="QCheckBox" name="opt_markdown_disable_toc">
|
|
||||||
<property name="text">
|
|
||||||
<string>Do not insert Table of Contents into output text when using markdown</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="6" column="0">
|
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -68,32 +44,47 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="1" column="1">
|
||||||
<widget class="QCheckBox" name="opt_preserve_spaces">
|
<widget class="QComboBox" name="opt_formatting_type"/>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QLabel" name="label_3">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Preserve &spaces</string>
|
<string>Formatting style:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="2" column="0" rowspan="2" colspan="2">
|
||||||
|
<widget class="QGroupBox" name="groupBox">
|
||||||
|
<property name="title">
|
||||||
|
<string>Markdown Options</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="label">
|
||||||
|
<property name="text">
|
||||||
|
<string><p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.</string>
|
||||||
|
</property>
|
||||||
|
<property name="wordWrap">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<property name="openExternalLinks">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="opt_markdown_disable_toc">
|
||||||
|
<property name="text">
|
||||||
|
<string>Do not insert Table of Contents into output text when using markdown</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections>
|
<connections/>
|
||||||
<connection>
|
|
||||||
<sender>opt_markdown</sender>
|
|
||||||
<signal>toggled(bool)</signal>
|
|
||||||
<receiver>opt_markdown_disable_toc</receiver>
|
|
||||||
<slot>setEnabled(bool)</slot>
|
|
||||||
<hints>
|
|
||||||
<hint type="sourcelabel">
|
|
||||||
<x>76</x>
|
|
||||||
<y>80</y>
|
|
||||||
</hint>
|
|
||||||
<hint type="destinationlabel">
|
|
||||||
<x>418</x>
|
|
||||||
<y>105</y>
|
|
||||||
</hint>
|
|
||||||
</hints>
|
|
||||||
</connection>
|
|
||||||
</connections>
|
|
||||||
</ui>
|
</ui>
|
||||||
|
@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['newline', 'max_line_length', 'force_max_line_length',
|
['newline', 'max_line_length', 'force_max_line_length',
|
||||||
'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references',
|
'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references',
|
||||||
'output_encoding'])
|
'txt_output_encoding'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
@ -96,10 +96,21 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="1">
|
<item row="2" column="1">
|
||||||
<widget class="QLineEdit" name="opt_output_encoding"/>
|
<widget class="EncodingComboBox" name="opt_txt_output_encoding">
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
|
<customwidgets>
|
||||||
|
<customwidget>
|
||||||
|
<class>EncodingComboBox</class>
|
||||||
|
<extends>QComboBox</extends>
|
||||||
|
<header>widgets.h</header>
|
||||||
|
</customwidget>
|
||||||
|
</customwidgets>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections/>
|
<connections/>
|
||||||
</ui>
|
</ui>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user