mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Pull from driver-dev
This commit is contained in:
commit
ad78fd2a68
@ -57,7 +57,6 @@ def line_length(raw, percent):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
total = sum(lengths)
|
total = sum(lengths)
|
||||||
print total
|
|
||||||
avg = total / len(lengths)
|
avg = total / len(lengths)
|
||||||
max_line = avg * 2
|
max_line = avg * 2
|
||||||
|
|
||||||
|
@ -9,12 +9,10 @@ Transform OEB content into FB2 markup
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import entity_to_unicode
|
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
@ -33,11 +31,11 @@ STYLES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
class FB2MLizer(object):
|
class FB2MLizer(object):
|
||||||
def __init__(self, ignore_tables=False):
|
def __init__(self, log):
|
||||||
self.ignore_tables = ignore_tables
|
self.log = log
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
oeb_book.logger.info('Converting XHTML to FB2 markup...')
|
self.log.info('Converting XHTML to FB2 markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
return self.fb2mlize_spine()
|
return self.fb2mlize_spine()
|
||||||
@ -45,12 +43,14 @@ class FB2MLizer(object):
|
|||||||
def fb2mlize_spine(self):
|
def fb2mlize_spine(self):
|
||||||
output = self.fb2_header()
|
output = self.fb2_header()
|
||||||
if 'titlepage' in self.oeb_book.guide:
|
if 'titlepage' in self.oeb_book.guide:
|
||||||
|
self.log.debug('Generating cover page...')
|
||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += self.fb2_body_footer()
|
output += self.fb2_body_footer()
|
||||||
|
@ -16,7 +16,7 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
file_type = 'fb2'
|
file_type = 'fb2'
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
|
fb2mlizer = FB2MLizer(log)
|
||||||
fb2_content = fb2mlizer.extract_content(oeb_book, opts)
|
fb2_content = fb2mlizer.extract_content(oeb_book, opts)
|
||||||
|
|
||||||
close = False
|
close = False
|
||||||
@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
|
|
||||||
out_stream.seek(0)
|
out_stream.seek(0)
|
||||||
out_stream.truncate()
|
out_stream.truncate()
|
||||||
out_stream.write(fb2_content.encode('utf-8'))
|
out_stream.write(fb2_content.encode('utf-8', 'replace'))
|
||||||
|
|
||||||
if close:
|
if close:
|
||||||
out_stream.close()
|
out_stream.close()
|
||||||
|
@ -8,7 +8,6 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre.ebooks.pdb.ereader.writer import Writer
|
from calibre.ebooks.pdb.ereader.writer import Writer
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
|
||||||
|
|
||||||
class EREADEROutput(OutputFormatPlugin):
|
class EREADEROutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
@ -73,9 +73,9 @@ class Reader132(FormatReader):
|
|||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if self.header_record.version == 2:
|
if self.header_record.version == 2:
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
if self.header_record.version == 10:
|
if self.header_record.version == 10:
|
||||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def get_image(self, number):
|
def get_image(self, number):
|
||||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||||
|
@ -54,7 +54,7 @@ class Reader202(FormatReader):
|
|||||||
return self.sections[number]
|
return self.sections[number]
|
||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def get_image(self, number):
|
def get_image(self, number):
|
||||||
name = None
|
name = None
|
||||||
|
@ -49,7 +49,7 @@ class Reader(FormatReader):
|
|||||||
if self.header_record.compression == 1:
|
if self.header_record.compression == 1:
|
||||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
if self.header_record.compression == 2:
|
if self.header_record.compression == 2:
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
|
@ -65,7 +65,7 @@ class Reader(FormatReader):
|
|||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if number == 1:
|
if number == 1:
|
||||||
self.uncompressor = zlib.decompressobj()
|
self.uncompressor = zlib.decompressobj()
|
||||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
txt = ''
|
txt = ''
|
||||||
|
@ -24,10 +24,12 @@ class PDFInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
log.debug('Converting file to html...')
|
||||||
# The main html file will be named index.html
|
# The main html file will be named index.html
|
||||||
pdftohtml(os.getcwd(), stream.name, options.no_images)
|
pdftohtml(os.getcwd(), stream.name, options.no_images)
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
log.debug('Retrieving document metadata...')
|
||||||
mi = get_metadata(stream, 'pdf')
|
mi = get_metadata(stream, 'pdf')
|
||||||
opf = OPFCreator(os.getcwd(), mi)
|
opf = OPFCreator(os.getcwd(), mi)
|
||||||
|
|
||||||
@ -42,9 +44,11 @@ class PDFInput(InputFormatPlugin):
|
|||||||
new_i = i.replace('-', '')
|
new_i = i.replace('-', '')
|
||||||
os.rename(i, new_i)
|
os.rename(i, new_i)
|
||||||
manifest.append((new_i, None))
|
manifest.append((new_i, None))
|
||||||
|
log.debug('Generating manifest...')
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
|
|
||||||
opf.create_spine(['index.html'])
|
opf.create_spine(['index.html'])
|
||||||
|
log.debug('Rendering manifest...')
|
||||||
with open('metadata.opf', 'wb') as opffile:
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
opf.render(opffile)
|
opf.render(opffile)
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -9,9 +8,8 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Convert OEB ebook format to PDF.
|
Convert OEB ebook format to PDF.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
#unit, papersize, orientation, custom_size, profile
|
import glob
|
||||||
|
import os
|
||||||
import os, glob
|
|
||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
OptionRecommendation
|
OptionRecommendation
|
||||||
@ -54,14 +52,17 @@ class PDFOutput(OutputFormatPlugin):
|
|||||||
self.metadata = oeb_book.metadata
|
self.metadata = oeb_book.metadata
|
||||||
|
|
||||||
if input_plugin.is_image_collection:
|
if input_plugin.is_image_collection:
|
||||||
|
log.debug('Converting input as an image collection...')
|
||||||
self.convert_images(input_plugin.get_images())
|
self.convert_images(input_plugin.get_images())
|
||||||
else:
|
else:
|
||||||
|
log.debug('Converting input as a text based book...')
|
||||||
self.convert_text(oeb_book)
|
self.convert_text(oeb_book)
|
||||||
|
|
||||||
def convert_images(self, images):
|
def convert_images(self, images):
|
||||||
self.write(ImagePDFWriter, images)
|
self.write(ImagePDFWriter, images)
|
||||||
|
|
||||||
def convert_text(self, oeb_book):
|
def convert_text(self, oeb_book):
|
||||||
|
self.log.debug('Serializing oeb input to disk for processing...')
|
||||||
with TemporaryDirectory('_pdf_out') as oeb_dir:
|
with TemporaryDirectory('_pdf_out') as oeb_dir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
@ -86,6 +87,7 @@ class PDFOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
out_stream.seek(0)
|
out_stream.seek(0)
|
||||||
out_stream.truncate()
|
out_stream.truncate()
|
||||||
|
self.log.debug('Rendering pages to PDF...')
|
||||||
writer.dump(items, out_stream, PDFMetadata(self.metadata))
|
writer.dump(items, out_stream, PDFMetadata(self.metadata))
|
||||||
|
|
||||||
if close:
|
if close:
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -9,11 +8,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Write content to PDF.
|
Write content to PDF.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, shutil
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
|
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
|
||||||
orientation, size
|
orientation
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
|
||||||
from PyQt4 import QtCore
|
from PyQt4 import QtCore
|
||||||
|
@ -4,7 +4,9 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import glob, os, shutil
|
import glob
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
@ -40,8 +42,9 @@ class PMLInput(InputFormatPlugin):
|
|||||||
if self.options.input_encoding:
|
if self.options.input_encoding:
|
||||||
ienc = self.options.input_encoding
|
ienc = self.options.input_encoding
|
||||||
|
|
||||||
|
self.log.debug('Converting PML to HTML...')
|
||||||
html = pml_to_html(pml_stream.read().decode(ienc))
|
html = pml_to_html(pml_stream.read().decode(ienc))
|
||||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>')
|
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
|
||||||
|
|
||||||
if pclose:
|
if pclose:
|
||||||
pml_stream.close()
|
pml_stream.close()
|
||||||
@ -51,9 +54,11 @@ class PMLInput(InputFormatPlugin):
|
|||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
self.options = options
|
self.options = options
|
||||||
|
self.log = log
|
||||||
pages, images = [], []
|
pages, images = [], []
|
||||||
|
|
||||||
if file_ext == 'pmlz':
|
if file_ext == 'pmlz':
|
||||||
|
log.debug('De-compressing content to temporary directory...')
|
||||||
with TemporaryDirectory('_unpmlz') as tdir:
|
with TemporaryDirectory('_unpmlz') as tdir:
|
||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
zf.extractall(tdir)
|
zf.extractall(tdir)
|
||||||
@ -64,6 +69,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
html_path = os.path.join(os.getcwd(), html_name)
|
html_path = os.path.join(os.getcwd(), html_name)
|
||||||
|
|
||||||
pages.append(html_name)
|
pages.append(html_name)
|
||||||
|
log.debug('Processing PML item %s...' % pml)
|
||||||
self.process_pml(pml, html_path)
|
self.process_pml(pml, html_path)
|
||||||
|
|
||||||
imgs = glob.glob(os.path.join(tdir, '*.png'))
|
imgs = glob.glob(os.path.join(tdir, '*.png'))
|
||||||
@ -90,12 +96,13 @@ class PMLInput(InputFormatPlugin):
|
|||||||
manifest_items.append((item, None))
|
manifest_items.append((item, None))
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
log.debug('Reading metadata from input file...')
|
||||||
mi = get_metadata(stream, 'pml')
|
mi = get_metadata(stream, 'pml')
|
||||||
opf = OPFCreator(os.getcwd(), mi)
|
opf = OPFCreator(os.getcwd(), mi)
|
||||||
|
log.debug('Generating manifest...')
|
||||||
opf.create_manifest(manifest_items)
|
opf.create_manifest(manifest_items)
|
||||||
opf.create_spine(pages)
|
opf.create_spine(pages)
|
||||||
with open('metadata.opf', 'wb') as opffile:
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
opf.render(opffile)
|
opf.render(opffile)
|
||||||
|
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||||
|
|
||||||
|
@ -37,13 +37,14 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
with TemporaryDirectory('_pmlz_output') as tdir:
|
with TemporaryDirectory('_pmlz_output') as tdir:
|
||||||
pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables)
|
pmlmlizer = PMLMLizer(log)
|
||||||
content = pmlmlizer.extract_content(oeb_book, opts)
|
content = pmlmlizer.extract_content(oeb_book, opts)
|
||||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||||
out.write(content.encode(opts.output_encoding, 'replace'))
|
out.write(content.encode(opts.output_encoding, 'replace'))
|
||||||
|
|
||||||
self.write_images(oeb_book.manifest, tdir)
|
self.write_images(oeb_book.manifest, tdir)
|
||||||
|
|
||||||
|
log.debug('Compressing output...')
|
||||||
pmlz = ZipFile(output_path, 'w')
|
pmlz = ZipFile(output_path, 'w')
|
||||||
pmlz.add_dir(tdir)
|
pmlz.add_dir(tdir)
|
||||||
|
|
||||||
|
@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from htmlentitydefs import codepoint2name
|
|
||||||
|
|
||||||
from calibre.ebooks.pdb.ereader import image_name
|
from calibre.ebooks.pdb.ereader import image_name
|
||||||
|
|
||||||
PML_HTML_RULES = [
|
PML_HTML_RULES = [
|
||||||
|
@ -67,11 +67,11 @@ SEPARATE_TAGS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
class PMLMLizer(object):
|
class PMLMLizer(object):
|
||||||
def __init__(self, ignore_tables=False):
|
def __init__(self, log):
|
||||||
self.ignore_tables = ignore_tables
|
self.log = log
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
oeb_book.logger.info('Converting XHTML to PML markup...')
|
self.log.info('Converting XHTML to PML markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
return self.pmlmlize_spine()
|
return self.pmlmlize_spine()
|
||||||
@ -79,12 +79,14 @@ class PMLMLizer(object):
|
|||||||
def pmlmlize_spine(self):
|
def pmlmlize_spine(self):
|
||||||
output = u''
|
output = u''
|
||||||
if 'titlepage' in self.oeb_book.guide:
|
if 'titlepage' in self.oeb_book.guide:
|
||||||
|
self.log.debug('Generating title page...')
|
||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to PML markup...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.add_page_anchor(item.href)
|
output += self.add_page_anchor(item.href)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
|
@ -52,12 +52,12 @@ STYLES = [
|
|||||||
|
|
||||||
class RBMLizer(object):
|
class RBMLizer(object):
|
||||||
|
|
||||||
def __init__(self, name_map={}, ignore_tables=False):
|
def __init__(self, log, name_map={}):
|
||||||
|
self.log = log
|
||||||
self.name_map = name_map
|
self.name_map = name_map
|
||||||
self.ignore_tables = ignore_tables
|
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
oeb_book.logger.info('Converting XHTML to RB markup...')
|
self.log.info('Converting XHTML to RB markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
return self.mlize_spine()
|
return self.mlize_spine()
|
||||||
@ -66,12 +66,14 @@ class RBMLizer(object):
|
|||||||
def mlize_spine(self):
|
def mlize_spine(self):
|
||||||
output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
|
output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
|
||||||
if 'titlepage' in self.oeb_book.guide:
|
if 'titlepage' in self.oeb_book.guide:
|
||||||
|
self.log.debug('Generating cover page...')
|
||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.add_page_anchor(item.href)
|
output += self.add_page_anchor(item.href)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
|
@ -84,9 +84,9 @@ class Reader(object):
|
|||||||
|
|
||||||
for size in chunck_sizes:
|
for size in chunck_sizes:
|
||||||
cm_chunck = self.stream.read(size)
|
cm_chunck = self.stream.read(size)
|
||||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
else:
|
else:
|
||||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
|
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||||
html.write(output.encode('utf-8'))
|
html.write(output.encode('utf-8'))
|
||||||
@ -102,14 +102,17 @@ class Reader(object):
|
|||||||
img.write(data)
|
img.write(data)
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
|
self.log.debug('Extracting content from file...')
|
||||||
html = []
|
html = []
|
||||||
images = []
|
images = []
|
||||||
|
|
||||||
for item in self.toc:
|
for item in self.toc:
|
||||||
if item.name.lower().endswith('html'):
|
if item.name.lower().endswith('html'):
|
||||||
|
self.log.debug('HTML item %s found...' % item.name)
|
||||||
html.append(item.name)
|
html.append(item.name)
|
||||||
self.get_text(item, output_dir)
|
self.get_text(item, output_dir)
|
||||||
if item.name.lower().endswith('png'):
|
if item.name.lower().endswith('png'):
|
||||||
|
self.log.debug('PNG item %s found...' % item.name)
|
||||||
images.append(item.name)
|
images.append(item.name)
|
||||||
self.get_image(item, output_dir)
|
self.get_image(item, output_dir)
|
||||||
|
|
||||||
|
@ -64,6 +64,7 @@ class RBWriter(object):
|
|||||||
flags = 0
|
flags = 0
|
||||||
toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
|
toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
|
||||||
|
|
||||||
|
self.log.debug('Writing file header...')
|
||||||
out_stream.write(HEADER)
|
out_stream.write(HEADER)
|
||||||
out_stream.write(struct.pack('<I', 0))
|
out_stream.write(struct.pack('<I', 0))
|
||||||
out_stream.write(struct.pack('<IH', 0, 0))
|
out_stream.write(struct.pack('<IH', 0, 0))
|
||||||
@ -82,6 +83,7 @@ class RBWriter(object):
|
|||||||
|
|
||||||
out_stream.write(info[0][1])
|
out_stream.write(info[0][1])
|
||||||
|
|
||||||
|
self.log.debug('Writing compressed RB HTHML...')
|
||||||
# Compressed text with proper heading
|
# Compressed text with proper heading
|
||||||
out_stream.write(struct.pack('<I', len(text[0][1])))
|
out_stream.write(struct.pack('<I', len(text[0][1])))
|
||||||
out_stream.write(struct.pack('<I', text_size))
|
out_stream.write(struct.pack('<I', text_size))
|
||||||
@ -90,6 +92,7 @@ class RBWriter(object):
|
|||||||
for chunck in text[0][1]:
|
for chunck in text[0][1]:
|
||||||
out_stream.write(chunck)
|
out_stream.write(chunck)
|
||||||
|
|
||||||
|
self.log.debug('Writing images...')
|
||||||
for item in hidx+images:
|
for item in hidx+images:
|
||||||
out_stream.write(item[1])
|
out_stream.write(item[1])
|
||||||
|
|
||||||
@ -98,7 +101,7 @@ class RBWriter(object):
|
|||||||
out_stream.write(struct.pack('<I', total_size))
|
out_stream.write(struct.pack('<I', total_size))
|
||||||
|
|
||||||
def _text(self, oeb_book):
|
def _text(self, oeb_book):
|
||||||
rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
|
rbmlizer = RBMLizer(log, name_map=self.name_map)
|
||||||
text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
|
text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
|
||||||
size = len(text)
|
size = len(text)
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ class RTFOutput(OutputFormatPlugin):
|
|||||||
file_type = 'rtf'
|
file_type = 'rtf'
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables)
|
rtfmlitzer = RTFMLizer(log)
|
||||||
content = rtfmlitzer.extract_content(oeb_book, opts)
|
content = rtfmlitzer.extract_content(oeb_book, opts)
|
||||||
|
|
||||||
close = False
|
close = False
|
||||||
|
@ -79,11 +79,11 @@ TODO:
|
|||||||
'''
|
'''
|
||||||
class RTFMLizer(object):
|
class RTFMLizer(object):
|
||||||
|
|
||||||
def __init__(self, ignore_tables=False):
|
def __init__(self, log):
|
||||||
self.ignore_tables = ignore_tables
|
self.log = log
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
oeb_book.logger.info('Converting XHTML to RTF markup...')
|
self.log.info('Converting XHTML to RTF markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
return self.mlize_spine()
|
return self.mlize_spine()
|
||||||
@ -98,6 +98,7 @@ class RTFMLizer(object):
|
|||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += '{\\page } '
|
output += '{\\page } '
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += self.footer()
|
output += self.footer()
|
||||||
|
@ -21,20 +21,26 @@ class TXTInput(InputFormatPlugin):
|
|||||||
ienc = stream.encoding if stream.encoding else 'utf-8'
|
ienc = stream.encoding if stream.encoding else 'utf-8'
|
||||||
if options.input_encoding:
|
if options.input_encoding:
|
||||||
ienc = options.input_encoding
|
ienc = options.input_encoding
|
||||||
txt = stream.read().decode(ienc)
|
log.debug('Reading text from file...')
|
||||||
|
txt = stream.read().decode(ienc, 'replace')
|
||||||
|
|
||||||
|
log.debug('Running text though markdown conversion...')
|
||||||
try:
|
try:
|
||||||
html = txt_to_markdown(txt)
|
html = txt_to_markdown(txt)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
|
|
||||||
|
log.debug('Writing html output...')
|
||||||
with open('index.html', 'wb') as index:
|
with open('index.html', 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
log.debug('Retrieving source document metadata...')
|
||||||
mi = get_metadata(stream, 'txt')
|
mi = get_metadata(stream, 'txt')
|
||||||
manifest = [('index.html', None)]
|
manifest = [('index.html', None)]
|
||||||
spine = ['index.html']
|
spine = ['index.html']
|
||||||
|
log.debug('Generating manifest...')
|
||||||
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
|
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
|
||||||
|
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||||
|
@ -24,6 +24,7 @@ class TxtWriter(object):
|
|||||||
def dump(self, spine):
|
def dump(self, spine):
|
||||||
out = u''
|
out = u''
|
||||||
for item in spine:
|
for item in spine:
|
||||||
|
self.log.debug('Processing %s...' % item.href)
|
||||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||||
content = self.remove_newlines(content)
|
content = self.remove_newlines(content)
|
||||||
content = self.strip_html(content)
|
content = self.strip_html(content)
|
||||||
@ -40,6 +41,7 @@ class TxtWriter(object):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
def strip_html(self, text):
|
def strip_html(self, text):
|
||||||
|
self.log.debug('\tStripping html...')
|
||||||
stripped = u''
|
stripped = u''
|
||||||
|
|
||||||
# Remove unnecessary tags
|
# Remove unnecessary tags
|
||||||
@ -77,6 +79,7 @@ class TxtWriter(object):
|
|||||||
return stripped
|
return stripped
|
||||||
|
|
||||||
def replace_html_symbols(self, content):
|
def replace_html_symbols(self, content):
|
||||||
|
self.log.debug('\tReplacing entities with unicode...')
|
||||||
for entity in set(re.findall('&.+?;', content)):
|
for entity in set(re.findall('&.+?;', content)):
|
||||||
mo = re.search('(%s)' % entity[1:-1], content)
|
mo = re.search('(%s)' % entity[1:-1], content)
|
||||||
content = content.replace(entity, entity_to_unicode(mo))
|
content = content.replace(entity, entity_to_unicode(mo))
|
||||||
@ -84,6 +87,7 @@ class TxtWriter(object):
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
def cleanup_text(self, text):
|
def cleanup_text(self, text):
|
||||||
|
self.log.debug('\tClean up text...')
|
||||||
# Replace bad characters.
|
# Replace bad characters.
|
||||||
text = text.replace(u'\xc2', '')
|
text = text.replace(u'\xc2', '')
|
||||||
text = text.replace(u'\xa0', ' ')
|
text = text.replace(u'\xa0', ' ')
|
||||||
@ -114,6 +118,7 @@ class TxtWriter(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def remove_newlines(self, text):
|
def remove_newlines(self, text):
|
||||||
|
self.log.debug('\tRemove newlines for processing...')
|
||||||
text = text.replace('\r\n', ' ')
|
text = text.replace('\r\n', ' ')
|
||||||
text = text.replace('\n', ' ')
|
text = text.replace('\n', ' ')
|
||||||
text = text.replace('\r', ' ')
|
text = text.replace('\r', ' ')
|
||||||
@ -121,6 +126,7 @@ class TxtWriter(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def specified_newlines(self, text):
|
def specified_newlines(self, text):
|
||||||
|
self.log.debug('\tReplacing newlines with selected type...')
|
||||||
if self.newline == '\n':
|
if self.newline == '\n':
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -167,7 +167,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.connect(self.quit_action, SIGNAL('triggered(bool)'), self.quit)
|
self.connect(self.quit_action, SIGNAL('triggered(bool)'), self.quit)
|
||||||
self.connect(self.donate_action, SIGNAL('triggered(bool)'), self.donate)
|
self.connect(self.donate_action, SIGNAL('triggered(bool)'), self.donate)
|
||||||
self.connect(self.restore_action, SIGNAL('triggered()'),
|
self.connect(self.restore_action, SIGNAL('triggered()'),
|
||||||
self.show)
|
self.show_windows)
|
||||||
self.connect(self.action_show_book_details,
|
self.connect(self.action_show_book_details,
|
||||||
SIGNAL('triggered(bool)'), self.show_book_info)
|
SIGNAL('triggered(bool)'), self.show_book_info)
|
||||||
self.connect(self.action_restart, SIGNAL('triggered()'),
|
self.connect(self.action_restart, SIGNAL('triggered()'),
|
||||||
@ -317,6 +317,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
pm = QMenu()
|
pm = QMenu()
|
||||||
ap = self.action_preferences
|
ap = self.action_preferences
|
||||||
pm.addAction(ap.icon(), ap.text())
|
pm.addAction(ap.icon(), ap.text())
|
||||||
|
pm.addAction(self.preferences_action)
|
||||||
pm.addAction(_('Run welcome wizard'))
|
pm.addAction(_('Run welcome wizard'))
|
||||||
self.connect(pm.actions()[1], SIGNAL('triggered(bool)'),
|
self.connect(pm.actions()[1], SIGNAL('triggered(bool)'),
|
||||||
self.run_wizard)
|
self.run_wizard)
|
||||||
@ -402,9 +403,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.card_a_view.connect_dirtied_signal(self.upload_booklists)
|
self.card_a_view.connect_dirtied_signal(self.upload_booklists)
|
||||||
self.card_b_view.connect_dirtied_signal(self.upload_booklists)
|
self.card_b_view.connect_dirtied_signal(self.upload_booklists)
|
||||||
|
|
||||||
self.show()
|
self.show_windows()
|
||||||
if self.system_tray_icon.isVisible() and opts.start_in_tray:
|
if self.system_tray_icon.isVisible() and opts.start_in_tray:
|
||||||
self.hide()
|
self.hide_windows()
|
||||||
self.stack.setCurrentIndex(0)
|
self.stack.setCurrentIndex(0)
|
||||||
try:
|
try:
|
||||||
db = LibraryDatabase2(self.library_path)
|
db = LibraryDatabase2(self.library_path)
|
||||||
@ -521,16 +522,22 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
def system_tray_icon_activated(self, r):
|
def system_tray_icon_activated(self, r):
|
||||||
if r == QSystemTrayIcon.Trigger:
|
if r == QSystemTrayIcon.Trigger:
|
||||||
if self.isVisible():
|
if self.isVisible():
|
||||||
for window in QApplication.topLevelWidgets():
|
self.hide_windows()
|
||||||
if isinstance(window, (MainWindow, QDialog)) and \
|
|
||||||
window.isVisible():
|
|
||||||
window.hide()
|
|
||||||
setattr(window, '__systray_minimized', True)
|
|
||||||
else:
|
else:
|
||||||
for window in QApplication.topLevelWidgets():
|
self.show_windows()
|
||||||
if getattr(window, '__systray_minimized', False):
|
|
||||||
window.show()
|
def hide_windows(self):
|
||||||
setattr(window, '__systray_minimized', False)
|
for window in QApplication.topLevelWidgets():
|
||||||
|
if isinstance(window, (MainWindow, QDialog)) and \
|
||||||
|
window.isVisible():
|
||||||
|
window.hide()
|
||||||
|
setattr(window, '__systray_minimized', True)
|
||||||
|
|
||||||
|
def show_windows(self):
|
||||||
|
for window in QApplication.topLevelWidgets():
|
||||||
|
if getattr(window, '__systray_minimized', False):
|
||||||
|
window.show()
|
||||||
|
setattr(window, '__systray_minimized', False)
|
||||||
|
|
||||||
def test_server(self, *args):
|
def test_server(self, *args):
|
||||||
if self.content_server.exception is not None:
|
if self.content_server.exception is not None:
|
||||||
@ -641,7 +648,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.add_filesystem_book(path)
|
self.add_filesystem_book(path)
|
||||||
self.setWindowState(self.windowState() & \
|
self.setWindowState(self.windowState() & \
|
||||||
~Qt.WindowMinimized|Qt.WindowActive)
|
~Qt.WindowMinimized|Qt.WindowActive)
|
||||||
self.show()
|
self.show_windows()
|
||||||
self.raise_()
|
self.raise_()
|
||||||
self.activateWindow()
|
self.activateWindow()
|
||||||
elif msg.startswith('refreshdb:'):
|
elif msg.startswith('refreshdb:'):
|
||||||
@ -1658,7 +1665,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.spare_servers.pop().close()
|
self.spare_servers.pop().close()
|
||||||
self.device_manager.keep_going = False
|
self.device_manager.keep_going = False
|
||||||
self.cover_cache.stop()
|
self.cover_cache.stop()
|
||||||
self.hide()
|
self.hide_windows()
|
||||||
self.cover_cache.terminate()
|
self.cover_cache.terminate()
|
||||||
self.emailer.stop()
|
self.emailer.stop()
|
||||||
try:
|
try:
|
||||||
@ -1670,7 +1677,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
self.hide()
|
self.hide_windows()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def run_wizard(self, *args):
|
def run_wizard(self, *args):
|
||||||
@ -1694,7 +1701,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
'choose <b>Quit</b> in the context menu of the '
|
'choose <b>Quit</b> in the context menu of the '
|
||||||
'system tray.')).exec_()
|
'system tray.')).exec_()
|
||||||
dynamic['systray_msg'] = True
|
dynamic['systray_msg'] = True
|
||||||
self.hide()
|
self.hide_windows()
|
||||||
e.ignore()
|
e.ignore()
|
||||||
else:
|
else:
|
||||||
if self.confirm_quit():
|
if self.confirm_quit():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user