TCR input.

This commit is contained in:
John Schember 2009-10-12 20:12:38 -04:00
parent 30fd38cdab
commit 1b2efaaf6f
3 changed files with 79 additions and 0 deletions

View File

@ -337,6 +337,7 @@ from calibre.ebooks.pml.input import PMLInput
from calibre.ebooks.rb.input import RBInput from calibre.ebooks.rb.input import RBInput
from calibre.web.feeds.input import RecipeInput from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput from calibre.ebooks.lrf.input import LRFInput
@ -385,6 +386,7 @@ plugins += [
RBInput, RBInput,
RecipeInput, RecipeInput,
RTFInput, RTFInput,
TCRInput,
TXTInput, TXTInput,
LRFInput, LRFInput,
] ]

View File

@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,72 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted
class TCRInput(InputFormatPlugin):
name = 'TCR Input'
author = 'John Schember'
description = 'Convert TCR files to HTML'
file_types = set(['tcr'])
options = set([
OptionRecommendation(name='single_line_paras', recommended_value=False,
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line represents '
'a paragraph instead.')),
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line starting with '
'an indent (either a tab or 2+ spaces) represents a paragraph. '
'Paragraphs end when the next line that starts with an indent '
'is reached.')),
])
def convert(self, stream, options, file_ext, log, accelerators):
txt = []
log.debug('Checking TCR header...')
if stream.read(9) != '!!8-Bit!!':
raise ValueError('File %s contaions an invalid TCR header.' % stream.name)
log.debug('Building string dictionary...')
# Dictionary codes that the file contents are broken down into.
entries = []
for i in xrange(256):
entry_len = ord(stream.read(1))
entries.append(stream.read(entry_len))
log.info('Decompressing text...')
# Map the values in the file to locations in the string list.
entry_loc = stream.read(1)
while entry_loc != '': # EOF
txt.append(entries[ord(entry_loc)])
entry_loc = stream.read(1)
ienc = options.input_encoding if options.input_encoding else 'utf-8'
txt = ''.join(txt).decode(ienc, 'replace')
log.info('Converting text to OEB...')
if options.single_line_paras:
txt = separate_paragraphs_single_line(txt)
if options.print_formatted_paras:
txt = separate_paragraphs_print_formatted(txt)
html = convert_basic(txt)
with open(os.path.join(os.getcwd(), 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(stream, 'tcr')
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
return os.path.join(os.getcwd(), 'metadata.opf')