diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 268dad4328..9cd3271fad 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -501,27 +501,27 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
# }}}
-from calibre.ebooks.comic.input import ComicInput
-from calibre.ebooks.djvu.input import DJVUInput
-from calibre.ebooks.epub.input import EPUBInput
-from calibre.ebooks.fb2.input import FB2Input
-from calibre.ebooks.html.input import HTMLInput
-from calibre.ebooks.htmlz.input import HTMLZInput
-from calibre.ebooks.lit.input import LITInput
-from calibre.ebooks.mobi.input import MOBIInput
-from calibre.ebooks.odt.input import ODTInput
-from calibre.ebooks.pdb.input import PDBInput
-from calibre.ebooks.azw4.input import AZW4Input
-from calibre.ebooks.pdf.input import PDFInput
-from calibre.ebooks.pml.input import PMLInput
-from calibre.ebooks.rb.input import RBInput
-from calibre.web.feeds.input import RecipeInput
-from calibre.ebooks.rtf.input import RTFInput
-from calibre.ebooks.tcr.input import TCRInput
-from calibre.ebooks.txt.input import TXTInput
-from calibre.ebooks.lrf.input import LRFInput
-from calibre.ebooks.chm.input import CHMInput
-from calibre.ebooks.snb.input import SNBInput
+from calibre.ebooks.conversion.plugins.comic_input import ComicInput
+from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
+from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
+from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
+from calibre.ebooks.conversion.plugins.html_input import HTMLInput
+from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
+from calibre.ebooks.conversion.plugins.lit_input import LITInput
+from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
+from calibre.ebooks.conversion.plugins.odt_input import ODTInput
+from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
+from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
+from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
+from calibre.ebooks.conversion.plugins.pml_input import PMLInput
+from calibre.ebooks.conversion.plugins.rb_input import RBInput
+from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
+from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
+from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
+from calibre.ebooks.conversion.plugins.txt_input import TXTInput
+from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
+from calibre.ebooks.conversion.plugins.chm_input import CHMInput
+from calibre.ebooks.conversion.plugins.snb_input import SNBInput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output
diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py
index 9fcfc559aa..221bece092 100755
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
Based on ideas from comiclrf created by FangornUK.
'''
-import os, shutil, traceback, textwrap, time, codecs
+import os, traceback, time
from Queue import Empty
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre import extract, CurrentDir, prints, walk
+from calibre import extract, prints, walk
from calibre.constants import filesystem_encoding
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.ipc.server import Server
@@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
return ans, failures
-class ComicInput(InputFormatPlugin):
-
- name = 'Comic Input'
- author = 'Kovid Goyal'
- description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
- file_types = set(['cbz', 'cbr', 'cbc'])
- is_image_collection = True
- core_usage = -1
-
- options = set([
- OptionRecommendation(name='colors', recommended_value=256,
- help=_('Number of colors for grayscale image conversion. Default: '
- '%default. Values of less than 256 may result in blurred text '
- 'on your device if you are creating your comics in EPUB format.')),
- OptionRecommendation(name='dont_normalize', recommended_value=False,
- help=_('Disable normalize (improve contrast) color range '
- 'for pictures. Default: False')),
- OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
- help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
- OptionRecommendation(name='dont_sharpen', recommended_value=False,
- help=_('Disable sharpening.')),
- OptionRecommendation(name='disable_trim', recommended_value=False,
- help=_('Disable trimming of comic pages. For some comics, '
- 'trimming might remove content as well as borders.')),
- OptionRecommendation(name='landscape', recommended_value=False,
- help=_("Don't split landscape images into two portrait images")),
- OptionRecommendation(name='wide', recommended_value=False,
- help=_("Keep aspect ratio and scale image using screen height as "
- "image width for viewing in landscape mode.")),
- OptionRecommendation(name='right2left', recommended_value=False,
- help=_('Used for right-to-left publications like manga. '
- 'Causes landscape pages to be split into portrait pages '
- 'from right to left.')),
- OptionRecommendation(name='despeckle', recommended_value=False,
- help=_('Enable Despeckle. Reduces speckle noise. '
- 'May greatly increase processing time.')),
- OptionRecommendation(name='no_sort', recommended_value=False,
- help=_("Don't sort the files found in the comic "
- "alphabetically by name. Instead use the order they were "
- "added to the comic.")),
- OptionRecommendation(name='output_format', choices=['png', 'jpg'],
- recommended_value='png', help=_('The format that images in the created ebook '
- 'are converted to. You can experiment to see which format gives '
- 'you optimal size and look on your device.')),
- OptionRecommendation(name='no_process', recommended_value=False,
- help=_("Apply no processing to the image")),
- OptionRecommendation(name='dont_grayscale', recommended_value=False,
- help=_('Do not convert the image to grayscale (black and white)')),
- OptionRecommendation(name='comic_image_size', recommended_value=None,
- help=_('Specify the image size as widthxheight pixels. Normally,'
- ' an image size is automatically calculated from the output '
- 'profile, this option overrides it.')),
- OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
- help=_('When converting a CBC do not add links to each page to'
- ' the TOC. Note this only applies if the TOC has more than one'
- ' section')),
- ])
-
- recommendations = set([
- ('margin_left', 0, OptionRecommendation.HIGH),
- ('margin_top', 0, OptionRecommendation.HIGH),
- ('margin_right', 0, OptionRecommendation.HIGH),
- ('margin_bottom', 0, OptionRecommendation.HIGH),
- ('insert_blank_line', False, OptionRecommendation.HIGH),
- ('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
- ('change_justification', 'left', OptionRecommendation.HIGH),
- ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
- ('chapter', None, OptionRecommendation.HIGH),
- ('page_breaks_brefore', None, OptionRecommendation.HIGH),
- ('use_auto_toc', False, OptionRecommendation.HIGH),
- ('page_breaks_before', None, OptionRecommendation.HIGH),
- ('disable_font_rescaling', True, OptionRecommendation.HIGH),
- ('linearize_tables', False, OptionRecommendation.HIGH),
- ])
-
- def get_comics_from_collection(self, stream):
- from calibre.libunzip import extract as zipextract
- tdir = PersistentTemporaryDirectory('_comic_collection')
- zipextract(stream, tdir)
- comics = []
- with CurrentDir(tdir):
- if not os.path.exists('comics.txt'):
- raise ValueError((
- '%s is not a valid comic collection'
- ' no comics.txt was found in the file')
- %stream.name)
- raw = open('comics.txt', 'rb').read()
- if raw.startswith(codecs.BOM_UTF16_BE):
- raw = raw.decode('utf-16-be')[1:]
- elif raw.startswith(codecs.BOM_UTF16_LE):
- raw = raw.decode('utf-16-le')[1:]
- elif raw.startswith(codecs.BOM_UTF8):
- raw = raw.decode('utf-8')[1:]
- else:
- raw = raw.decode('utf-8')
- for line in raw.splitlines():
- line = line.strip()
- if not line:
- continue
- fname, title = line.partition(':')[0], line.partition(':')[-1]
- fname = fname.replace('#', '_')
- fname = os.path.join(tdir, *fname.split('/'))
- if not title:
- title = os.path.basename(fname).rpartition('.')[0]
- if os.access(fname, os.R_OK):
- comics.append([title, fname])
- if not comics:
- raise ValueError('%s has no comics'%stream.name)
- return comics
-
- def get_pages(self, comic, tdir2):
- tdir = extract_comic(comic)
- new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
- verbose=self.opts.verbose)
- thumbnail = None
- if not new_pages:
- raise ValueError('Could not find any pages in the comic: %s'
- %comic)
- if self.opts.no_process:
- n2 = []
- for page in new_pages:
- n2.append(os.path.join(tdir2, os.path.basename(page)))
- shutil.copyfile(page, n2[-1])
- new_pages = n2
- else:
- new_pages, failures = process_pages(new_pages, self.opts,
- self.report_progress, tdir2)
- if failures:
- self.log.warning('Could not process the following pages '
- '(run with --verbose to see why):')
- for f in failures:
- self.log.warning('\t', f)
- if not new_pages:
- raise ValueError('Could not find any valid pages in comic: %s'
- % comic)
- thumbnail = os.path.join(tdir2,
- 'thumbnail.'+self.opts.output_format.lower())
- if not os.access(thumbnail, os.R_OK):
- thumbnail = None
- return new_pages
-
- def get_images(self):
- return self._images
-
- def convert(self, stream, opts, file_ext, log, accelerators):
- from calibre.ebooks.metadata import MetaInformation
- from calibre.ebooks.metadata.opf2 import OPFCreator
- from calibre.ebooks.metadata.toc import TOC
-
- self.opts, self.log= opts, log
- if file_ext == 'cbc':
- comics_ = self.get_comics_from_collection(stream)
- else:
- comics_ = [['Comic', os.path.abspath(stream.name)]]
- stream.close()
- comics = []
- for i, x in enumerate(comics_):
- title, fname = x
- cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
- cdir = os.path.abspath(cdir)
- if not os.path.exists(cdir):
- os.makedirs(cdir)
- pages = self.get_pages(fname, cdir)
- if not pages: continue
- wrappers = self.create_wrappers(pages)
- comics.append((title, pages, wrappers))
-
- if not comics:
- raise ValueError('No comic pages found in %s'%stream.name)
-
- mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
- [_('Unknown')])
- opf = OPFCreator(os.path.abspath('.'), mi)
- entries = []
-
- def href(x):
- if len(comics) == 1: return os.path.basename(x)
- return '/'.join(x.split(os.sep)[-2:])
-
- for comic in comics:
- pages, wrappers = comic[1:]
- entries += [(w, None) for w in map(href, wrappers)] + \
- [(x, None) for x in map(href, pages)]
- opf.create_manifest(entries)
- spine = []
- for comic in comics:
- spine.extend(map(href, comic[2]))
- self._images = []
- for comic in comics:
- self._images.extend(comic[1])
- opf.create_spine(spine)
- toc = TOC()
- if len(comics) == 1:
- wrappers = comics[0][2]
- for i, x in enumerate(wrappers):
- toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
- play_order=i)
- else:
- po = 0
- for comic in comics:
- po += 1
- wrappers = comic[2]
- stoc = toc.add_item(href(wrappers[0]),
- None, comic[0], play_order=po)
- if not opts.dont_add_comic_pages_to_toc:
- for i, x in enumerate(wrappers):
- stoc.add_item(href(x), None,
- _('Page')+' %d'%(i+1), play_order=po)
- po += 1
- opf.set_toc(toc)
- m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
- opf.render(m, n, 'toc.ncx')
- return os.path.abspath('metadata.opf')
-
- def create_wrappers(self, pages):
- from calibre.ebooks.oeb.base import XHTML_NS
- wrappers = []
- WRAPPER = textwrap.dedent('''\
-
-
- Page #%d
-
-
-
-
-

-
-
-
- ''')
- dir = os.path.dirname(pages[0])
- for i, page in enumerate(pages):
- wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
- page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
- open(page, 'wb').write(wrapper)
- wrappers.append(page)
- return wrappers
diff --git a/src/calibre/ebooks/conversion/plugins/__init__.py b/src/calibre/ebooks/conversion/plugins/__init__.py
new file mode 100644
index 0000000000..dd9615356c
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/__init__.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+
+
diff --git a/src/calibre/ebooks/azw4/input.py b/src/calibre/ebooks/conversion/plugins/azw4_input.py
similarity index 84%
rename from src/calibre/ebooks/azw4/input.py
rename to src/calibre/ebooks/conversion/plugins/azw4_input.py
index 1ac7657342..6d2b2a917e 100644
--- a/src/calibre/ebooks/azw4/input.py
+++ b/src/calibre/ebooks/conversion/plugins/azw4_input.py
@@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.pdb.header import PdbHeaderReader
-from calibre.ebooks.azw4.reader import Reader
class AZW4Input(InputFormatPlugin):
@@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.pdb.header import PdbHeaderReader
+ from calibre.ebooks.azw4.reader import Reader
+
header = PdbHeaderReader(stream)
reader = Reader(header, stream, log, options)
opf = reader.extract_content(os.getcwd())
diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/conversion/plugins/chm_input.py
similarity index 98%
rename from src/calibre/ebooks/chm/input.py
rename to src/calibre/ebooks/conversion/plugins/chm_input.py
index f36685bd91..a674735f1d 100644
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/conversion/plugins/chm_input.py
@@ -3,9 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal ,' \
' and Alex Bramley .'
-import os, uuid
-
-from lxml import html
+import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
@@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
# use HTMLInput plugin to generate book
- from calibre.ebooks.html.input import HTMLInput
+ from calibre.customize.builtins import HTMLInput
opts.breadth_first = True
htmlinput = HTMLInput(None)
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
@@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
+ import uuid
+ from lxml import html
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer
oeb = create_oebbook(log, None, opts,
@@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
return oeb
def _create_html_root(self, hhcpath, log):
+ from lxml import html
hhcdata = self._read_file(hhcpath)
hhcroot = html.fromstring(hhcdata)
chapters = self._process_nodes(hhcroot)
diff --git a/src/calibre/ebooks/conversion/plugins/comic_input.py b/src/calibre/ebooks/conversion/plugins/comic_input.py
new file mode 100644
index 0000000000..77ae7d8086
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/comic_input.py
@@ -0,0 +1,259 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Based on ideas from comiclrf created by FangornUK.
+'''
+
+import shutil, textwrap, codecs, os
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import CurrentDir
+from calibre.ptempfile import PersistentTemporaryDirectory
+
+class ComicInput(InputFormatPlugin):
+
+ name = 'Comic Input'
+ author = 'Kovid Goyal'
+ description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
+ file_types = set(['cbz', 'cbr', 'cbc'])
+ is_image_collection = True
+ core_usage = -1
+
+ options = set([
+ OptionRecommendation(name='colors', recommended_value=256,
+ help=_('Number of colors for grayscale image conversion. Default: '
+ '%default. Values of less than 256 may result in blurred text '
+ 'on your device if you are creating your comics in EPUB format.')),
+ OptionRecommendation(name='dont_normalize', recommended_value=False,
+ help=_('Disable normalize (improve contrast) color range '
+ 'for pictures. Default: False')),
+ OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
+ help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
+ OptionRecommendation(name='dont_sharpen', recommended_value=False,
+ help=_('Disable sharpening.')),
+ OptionRecommendation(name='disable_trim', recommended_value=False,
+ help=_('Disable trimming of comic pages. For some comics, '
+ 'trimming might remove content as well as borders.')),
+ OptionRecommendation(name='landscape', recommended_value=False,
+ help=_("Don't split landscape images into two portrait images")),
+ OptionRecommendation(name='wide', recommended_value=False,
+ help=_("Keep aspect ratio and scale image using screen height as "
+ "image width for viewing in landscape mode.")),
+ OptionRecommendation(name='right2left', recommended_value=False,
+ help=_('Used for right-to-left publications like manga. '
+ 'Causes landscape pages to be split into portrait pages '
+ 'from right to left.')),
+ OptionRecommendation(name='despeckle', recommended_value=False,
+ help=_('Enable Despeckle. Reduces speckle noise. '
+ 'May greatly increase processing time.')),
+ OptionRecommendation(name='no_sort', recommended_value=False,
+ help=_("Don't sort the files found in the comic "
+ "alphabetically by name. Instead use the order they were "
+ "added to the comic.")),
+ OptionRecommendation(name='output_format', choices=['png', 'jpg'],
+ recommended_value='png', help=_('The format that images in the created ebook '
+ 'are converted to. You can experiment to see which format gives '
+ 'you optimal size and look on your device.')),
+ OptionRecommendation(name='no_process', recommended_value=False,
+ help=_("Apply no processing to the image")),
+ OptionRecommendation(name='dont_grayscale', recommended_value=False,
+ help=_('Do not convert the image to grayscale (black and white)')),
+ OptionRecommendation(name='comic_image_size', recommended_value=None,
+ help=_('Specify the image size as widthxheight pixels. Normally,'
+ ' an image size is automatically calculated from the output '
+ 'profile, this option overrides it.')),
+ OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
+ help=_('When converting a CBC do not add links to each page to'
+ ' the TOC. Note this only applies if the TOC has more than one'
+ ' section')),
+ ])
+
+ recommendations = set([
+ ('margin_left', 0, OptionRecommendation.HIGH),
+ ('margin_top', 0, OptionRecommendation.HIGH),
+ ('margin_right', 0, OptionRecommendation.HIGH),
+ ('margin_bottom', 0, OptionRecommendation.HIGH),
+ ('insert_blank_line', False, OptionRecommendation.HIGH),
+ ('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
+ ('change_justification', 'left', OptionRecommendation.HIGH),
+ ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
+ ('chapter', None, OptionRecommendation.HIGH),
+ ('page_breaks_brefore', None, OptionRecommendation.HIGH),
+ ('use_auto_toc', False, OptionRecommendation.HIGH),
+ ('page_breaks_before', None, OptionRecommendation.HIGH),
+ ('disable_font_rescaling', True, OptionRecommendation.HIGH),
+ ('linearize_tables', False, OptionRecommendation.HIGH),
+ ])
+
+ def get_comics_from_collection(self, stream):
+ from calibre.libunzip import extract as zipextract
+ tdir = PersistentTemporaryDirectory('_comic_collection')
+ zipextract(stream, tdir)
+ comics = []
+ with CurrentDir(tdir):
+ if not os.path.exists('comics.txt'):
+ raise ValueError((
+ '%s is not a valid comic collection'
+ ' no comics.txt was found in the file')
+ %stream.name)
+ raw = open('comics.txt', 'rb').read()
+ if raw.startswith(codecs.BOM_UTF16_BE):
+ raw = raw.decode('utf-16-be')[1:]
+ elif raw.startswith(codecs.BOM_UTF16_LE):
+ raw = raw.decode('utf-16-le')[1:]
+ elif raw.startswith(codecs.BOM_UTF8):
+ raw = raw.decode('utf-8')[1:]
+ else:
+ raw = raw.decode('utf-8')
+ for line in raw.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ fname, title = line.partition(':')[0], line.partition(':')[-1]
+ fname = fname.replace('#', '_')
+ fname = os.path.join(tdir, *fname.split('/'))
+ if not title:
+ title = os.path.basename(fname).rpartition('.')[0]
+ if os.access(fname, os.R_OK):
+ comics.append([title, fname])
+ if not comics:
+ raise ValueError('%s has no comics'%stream.name)
+ return comics
+
+ def get_pages(self, comic, tdir2):
+ from calibre.ebooks.comic.input import (extract_comic, process_pages,
+ find_pages)
+ tdir = extract_comic(comic)
+ new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
+ verbose=self.opts.verbose)
+ thumbnail = None
+ if not new_pages:
+ raise ValueError('Could not find any pages in the comic: %s'
+ %comic)
+ if self.opts.no_process:
+ n2 = []
+ for page in new_pages:
+ n2.append(os.path.join(tdir2, os.path.basename(page)))
+ shutil.copyfile(page, n2[-1])
+ new_pages = n2
+ else:
+ new_pages, failures = process_pages(new_pages, self.opts,
+ self.report_progress, tdir2)
+ if failures:
+ self.log.warning('Could not process the following pages '
+ '(run with --verbose to see why):')
+ for f in failures:
+ self.log.warning('\t', f)
+ if not new_pages:
+ raise ValueError('Could not find any valid pages in comic: %s'
+ % comic)
+ thumbnail = os.path.join(tdir2,
+ 'thumbnail.'+self.opts.output_format.lower())
+ if not os.access(thumbnail, os.R_OK):
+ thumbnail = None
+ return new_pages
+
+ def get_images(self):
+ return self._images
+
+ def convert(self, stream, opts, file_ext, log, accelerators):
+ from calibre.ebooks.metadata import MetaInformation
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.metadata.toc import TOC
+
+ self.opts, self.log= opts, log
+ if file_ext == 'cbc':
+ comics_ = self.get_comics_from_collection(stream)
+ else:
+ comics_ = [['Comic', os.path.abspath(stream.name)]]
+ stream.close()
+ comics = []
+ for i, x in enumerate(comics_):
+ title, fname = x
+ cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
+ cdir = os.path.abspath(cdir)
+ if not os.path.exists(cdir):
+ os.makedirs(cdir)
+ pages = self.get_pages(fname, cdir)
+ if not pages: continue
+ wrappers = self.create_wrappers(pages)
+ comics.append((title, pages, wrappers))
+
+ if not comics:
+ raise ValueError('No comic pages found in %s'%stream.name)
+
+ mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
+ [_('Unknown')])
+ opf = OPFCreator(os.path.abspath('.'), mi)
+ entries = []
+
+ def href(x):
+ if len(comics) == 1: return os.path.basename(x)
+ return '/'.join(x.split(os.sep)[-2:])
+
+ for comic in comics:
+ pages, wrappers = comic[1:]
+ entries += [(w, None) for w in map(href, wrappers)] + \
+ [(x, None) for x in map(href, pages)]
+ opf.create_manifest(entries)
+ spine = []
+ for comic in comics:
+ spine.extend(map(href, comic[2]))
+ self._images = []
+ for comic in comics:
+ self._images.extend(comic[1])
+ opf.create_spine(spine)
+ toc = TOC()
+ if len(comics) == 1:
+ wrappers = comics[0][2]
+ for i, x in enumerate(wrappers):
+ toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
+ play_order=i)
+ else:
+ po = 0
+ for comic in comics:
+ po += 1
+ wrappers = comic[2]
+ stoc = toc.add_item(href(wrappers[0]),
+ None, comic[0], play_order=po)
+ if not opts.dont_add_comic_pages_to_toc:
+ for i, x in enumerate(wrappers):
+ stoc.add_item(href(x), None,
+ _('Page')+' %d'%(i+1), play_order=po)
+ po += 1
+ opf.set_toc(toc)
+ m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
+ opf.render(m, n, 'toc.ncx')
+ return os.path.abspath('metadata.opf')
+
+ def create_wrappers(self, pages):
+ from calibre.ebooks.oeb.base import XHTML_NS
+ wrappers = []
+ WRAPPER = textwrap.dedent('''\
+
+
+ Page #%d
+
+
+
+
+

+
+
+
+ ''')
+ dir = os.path.dirname(pages[0])
+ for i, page in enumerate(pages):
+ wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
+ page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
+ open(page, 'wb').write(wrapper)
+ wrappers.append(page)
+ return wrappers
+
diff --git a/src/calibre/ebooks/djvu/input.py b/src/calibre/ebooks/conversion/plugins/djvu_input.py
similarity index 98%
rename from src/calibre/ebooks/djvu/input.py
rename to src/calibre/ebooks/conversion/plugins/djvu_input.py
index 70dbf97f5d..936ef1a702 100644
--- a/src/calibre/ebooks/djvu/input.py
+++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py
@@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
from cStringIO import StringIO
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.ebooks.txt.processor import convert_basic
class DJVUInput(InputFormatPlugin):
@@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
])
def convert(self, stream, options, file_ext, log, accelerators):
+ from calibre.ebooks.txt.processor import convert_basic
+
stdout = StringIO()
ppdjvu = True
# using djvutxt is MUCH faster, should make it an option
diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/conversion/plugins/epub_input.py
similarity index 98%
rename from src/calibre/ebooks/epub/input.py
rename to src/calibre/ebooks/conversion/plugins/epub_input.py
index c2cfedd7d4..47356dbd1f 100644
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@@ -3,11 +3,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, uuid
+import os
from itertools import cycle
-from lxml import etree
-
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
class EPUBInput(InputFormatPlugin):
@@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
f.write(raw[1024:])
def process_encryption(self, encfile, opf, log):
+ from lxml import etree
+ import uuid
key = None
for item in opf.identifier_iter():
scheme = None
@@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
return False
def rationalize_cover(self, opf, log):
+ from lxml import etree
guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide():
if guide_elem.get('type', '').lower() == 'cover':
@@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
renderer)
def find_opf(self):
+ from lxml import etree
def attr(n, attr):
for k, v in n.attrib.items():
if k.endswith(attr):
diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py
similarity index 99%
rename from src/calibre/ebooks/fb2/input.py
rename to src/calibre/ebooks/conversion/plugins/fb2_input.py
index 147e940eb4..747f8f19d8 100644
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py
@@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
"""
import os, re
from base64 import b64decode
-from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
@@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from lxml import etree
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
diff --git a/src/calibre/ebooks/conversion/plugins/html_input.py b/src/calibre/ebooks/conversion/plugins/html_input.py
new file mode 100644
index 0000000000..cfd2ebf8cf
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/html_input.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import re, tempfile, os
+from functools import partial
+from itertools import izip
+from urllib import quote
+
+from calibre.constants import islinux, isbsd
+from calibre.customize.conversion import (InputFormatPlugin,
+ OptionRecommendation)
+from calibre.utils.localization import get_lang
+from calibre.utils.filenames import ascii_filename
+
+
+class HTMLInput(InputFormatPlugin):
+
+ name = 'HTML Input'
+ author = 'Kovid Goyal'
+ description = 'Convert HTML and OPF files to an OEB'
+ file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
+
+ options = set([
+ OptionRecommendation(name='breadth_first',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Traverse links in HTML files breadth first. Normally, '
+ 'they are traversed depth first.'
+ )
+ ),
+
+ OptionRecommendation(name='max_levels',
+ recommended_value=5, level=OptionRecommendation.LOW,
+ help=_('Maximum levels of recursion when following links in '
+ 'HTML files. Must be non-negative. 0 implies that no '
+ 'links in the root HTML file are followed. Default is '
+ '%default.'
+ )
+ ),
+
+ OptionRecommendation(name='dont_package',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Normally this input plugin re-arranges all the input '
+ 'files into a standard folder hierarchy. Only use this option '
+ 'if you know what you are doing as it can result in various '
+ 'nasty side effects in the rest of the conversion pipeline.'
+ )
+ ),
+
+ ])
+
+ def convert(self, stream, opts, file_ext, log,
+ accelerators):
+ self._is_case_sensitive = None
+ basedir = os.getcwd()
+ self.opts = opts
+
+ fname = None
+ if hasattr(stream, 'name'):
+ basedir = os.path.dirname(stream.name)
+ fname = os.path.basename(stream.name)
+
+ if file_ext != 'opf':
+ if opts.dont_package:
+ raise ValueError('The --dont-package option is not supported for an HTML input file')
+ from calibre.ebooks.metadata.html import get_metadata
+ mi = get_metadata(stream)
+ if fname:
+ from calibre.ebooks.metadata.meta import metadata_from_filename
+ fmi = metadata_from_filename(fname)
+ fmi.smart_update(mi)
+ mi = fmi
+ oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
+ return oeb
+
+ from calibre.ebooks.conversion.plumber import create_oebbook
+ return create_oebbook(log, stream.name, opts,
+ encoding=opts.input_encoding)
+
+ def is_case_sensitive(self, path):
+ if getattr(self, '_is_case_sensitive', None) is not None:
+ return self._is_case_sensitive
+ if not path or not os.path.exists(path):
+ return islinux or isbsd
+ self._is_case_sensitive = not (os.path.exists(path.lower()) \
+ and os.path.exists(path.upper()))
+ return self._is_case_sensitive
+
+ def create_oebbook(self, htmlpath, basedir, opts, log, mi):
+ import uuid
+ from calibre.ebooks.conversion.plumber import create_oebbook
+ from calibre.ebooks.oeb.base import (DirContainer,
+ rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
+ xpath)
+ from calibre import guess_type
+ from calibre.ebooks.oeb.transforms.metadata import \
+ meta_info_to_oeb_metadata
+ from calibre.ebooks.html.input import get_filelist
+ import cssutils, logging
+ cssutils.log.setLevel(logging.WARN)
+ self.OEB_STYLES = OEB_STYLES
+ oeb = create_oebbook(log, None, opts, self,
+ encoding=opts.input_encoding, populate=False)
+ self.oeb = oeb
+
+ metadata = oeb.metadata
+ meta_info_to_oeb_metadata(mi, metadata, log)
+ if not metadata.language:
+ oeb.logger.warn(u'Language not specified')
+ metadata.add('language', get_lang().replace('_', '-'))
+ if not metadata.creator:
+ oeb.logger.warn('Creator not specified')
+ metadata.add('creator', self.oeb.translate(__('Unknown')))
+ if not metadata.title:
+ oeb.logger.warn('Title not specified')
+ metadata.add('title', self.oeb.translate(__('Unknown')))
+ bookid = str(uuid.uuid4())
+ metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
+ for ident in metadata.identifier:
+ if 'id' in ident.attrib:
+ self.oeb.uid = metadata.identifier[0]
+ break
+
+ filelist = get_filelist(htmlpath, basedir, opts, log)
+ filelist = [f for f in filelist if not f.is_binary]
+ htmlfile_map = {}
+ for f in filelist:
+ path = f.path
+ oeb.container = DirContainer(os.path.dirname(path), log,
+ ignore_opf=True)
+ bname = os.path.basename(path)
+ id, href = oeb.manifest.generate(id='html',
+ href=ascii_filename(bname))
+ htmlfile_map[path] = href
+ item = oeb.manifest.add(id, href, 'text/html')
+ item.html_input_href = bname
+ oeb.spine.add(item, True)
+
+ self.added_resources = {}
+ self.log = log
+ self.log('Normalizing filename cases')
+ for path, href in htmlfile_map.items():
+ if not self.is_case_sensitive(path):
+ path = path.lower()
+ self.added_resources[path] = href
+ self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
+ self.urldefrag = urldefrag
+ self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
+
+ self.log('Rewriting HTML links')
+ for f in filelist:
+ path = f.path
+ dpath = os.path.dirname(path)
+ oeb.container = DirContainer(dpath, log, ignore_opf=True)
+ item = oeb.manifest.hrefs[htmlfile_map[path]]
+ rewrite_links(item.data, partial(self.resource_adder, base=dpath))
+
+ for item in oeb.manifest.values():
+ if item.media_type in self.OEB_STYLES:
+ dpath = None
+ for path, href in self.added_resources.items():
+ if href == item.href:
+ dpath = os.path.dirname(path)
+ break
+ cssutils.replaceUrls(item.data,
+ partial(self.resource_adder, base=dpath))
+
+ toc = self.oeb.toc
+ self.oeb.auto_generated_toc = True
+ titles = []
+ headers = []
+ for item in self.oeb.spine:
+ if not item.linear: continue
+ html = item.data
+ title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
+ title = re.sub(r'\s+', ' ', title.strip())
+ if title:
+ titles.append(title)
+ headers.append('(unlabled)')
+ for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
+ expr = '/h:html/h:body//h:%s[position()=1]/text()'
+ header = ''.join(xpath(html, expr % tag))
+ header = re.sub(r'\s+', ' ', header.strip())
+ if header:
+ headers[-1] = header
+ break
+ use = titles
+ if len(titles) > len(set(titles)):
+ use = headers
+ for title, item in izip(use, self.oeb.spine):
+ if not item.linear: continue
+ toc.add(title, item.href)
+
+ oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
+ return oeb
+
+ def link_to_local_path(self, link_, base=None):
+ from calibre.ebooks.html.input import Link
+ if not isinstance(link_, unicode):
+ try:
+ link_ = link_.decode('utf-8', 'error')
+ except:
+ self.log.warn('Failed to decode link %r. Ignoring'%link_)
+ return None, None
+ try:
+ l = Link(link_, base if base else os.getcwdu())
+ except:
+ self.log.exception('Failed to process link: %r'%link_)
+ return None, None
+ if l.path is None:
+ # Not a local resource
+ return None, None
+ link = l.path.replace('/', os.sep).strip()
+ frag = l.fragment
+ if not link:
+ return None, None
+ return link, frag
+
+ def resource_adder(self, link_, base=None):
+ link, frag = self.link_to_local_path(link_, base=base)
+ if link is None:
+ return link_
+ try:
+ if base and not os.path.isabs(link):
+ link = os.path.join(base, link)
+ link = os.path.abspath(link)
+ except:
+ return link_
+ if not os.access(link, os.R_OK):
+ return link_
+ if os.path.isdir(link):
+ self.log.warn(link_, 'is a link to a directory. Ignoring.')
+ return link_
+ if not self.is_case_sensitive(tempfile.gettempdir()):
+ link = link.lower()
+ if link not in self.added_resources:
+ bhref = os.path.basename(link)
+ id, href = self.oeb.manifest.generate(id='added',
+ href=bhref)
+ guessed = self.guess_type(href)[0]
+ media_type = guessed or self.BINARY_MIME
+ if media_type == 'text/plain':
+ self.log.warn('Ignoring link to text file %r'%link_)
+ return None
+
+ self.oeb.log.debug('Added', link)
+ self.oeb.container = self.DirContainer(os.path.dirname(link),
+ self.oeb.log, ignore_opf=True)
+ # Load into memory
+ item = self.oeb.manifest.add(id, href, media_type)
+ # bhref refers to an already existing file. The read() method of
+ # DirContainer will call unquote on it before trying to read the
+ # file, therefore we quote it here.
+ if isinstance(bhref, unicode):
+ bhref = bhref.encode('utf-8')
+ item.html_input_href = quote(bhref).decode('utf-8')
+ if guessed in self.OEB_STYLES:
+ item.override_css_fetch = partial(
+ self.css_import_handler, os.path.dirname(link))
+ item.data
+ self.added_resources[link] = href
+
+ nlink = self.added_resources[link]
+ if frag:
+ nlink = '#'.join((nlink, frag))
+ return nlink
+
+ def css_import_handler(self, base, href):
+ link, frag = self.link_to_local_path(href, base=base)
+ if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
+ return (None, None)
+ try:
+ raw = open(link, 'rb').read().decode('utf-8', 'replace')
+ raw = self.oeb.css_preprocessor(raw, add_namespace=True)
+ except:
+ self.log.exception('Failed to read CSS file: %r'%link)
+ return (None, None)
+ return (None, raw)
diff --git a/src/calibre/ebooks/htmlz/input.py b/src/calibre/ebooks/conversion/plugins/htmlz_input.py
similarity index 96%
rename from src/calibre/ebooks/htmlz/input.py
rename to src/calibre/ebooks/conversion/plugins/htmlz_input.py
index f0f45f72fe..e9fbb1d7c2 100644
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/conversion/plugins/htmlz_input.py
@@ -10,9 +10,6 @@ import os
from calibre import guess_type
from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.utils.zipfile import ZipFile
class HTMLZInput(InputFormatPlugin):
@@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.chardet import xml_to_unicode
+ from calibre.ebooks.metadata.opf2 import OPF
+ from calibre.utils.zipfile import ZipFile
+
self.log = log
html = u''
top_levels = []
diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/conversion/plugins/lit_input.py
similarity index 100%
rename from src/calibre/ebooks/lit/input.py
rename to src/calibre/ebooks/conversion/plugins/lit_input.py
diff --git a/src/calibre/ebooks/conversion/plugins/lrf_input.py b/src/calibre/ebooks/conversion/plugins/lrf_input.py
new file mode 100644
index 0000000000..63af39e1e0
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/lrf_input.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os, sys
+from calibre.customize.conversion import InputFormatPlugin
+
+class LRFInput(InputFormatPlugin):
+
+ name = 'LRF Input'
+ author = 'Kovid Goyal'
+ description = 'Convert LRF files to HTML'
+ file_types = set(['lrf'])
+
+ def convert(self, stream, options, file_ext, log,
+ accelerators):
+ from lxml import etree
+ from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
+ Canvas, ImageBlock, RuledLine)
+ self.log = log
+ self.log('Generating XML')
+ from calibre.ebooks.lrf.lrfparser import LRFDocument
+ d = LRFDocument(stream)
+ d.parse()
+ xml = d.to_xml(write_files=True)
+ if options.verbose > 2:
+ open('lrs.xml', 'wb').write(xml.encode('utf-8'))
+ parser = etree.XMLParser(no_network=True, huge_tree=True)
+ try:
+ doc = etree.fromstring(xml, parser=parser)
+ except:
+ self.log.warn('Failed to parse XML. Trying to recover')
+ parser = etree.XMLParser(no_network=True, huge_tree=True,
+ recover=True)
+ doc = etree.fromstring(xml, parser=parser)
+
+
+ char_button_map = {}
+ for x in doc.xpath('//CharButton[@refobj]'):
+ ro = x.get('refobj')
+ jump_button = doc.xpath('//*[@objid="%s"]'%ro)
+ if jump_button:
+ jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
+ if jump_to:
+ char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
+ jump_to[0].get('refobj'))
+ plot_map = {}
+ for x in doc.xpath('//Plot[@refobj]'):
+ ro = x.get('refobj')
+ image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
+ if image:
+ imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
+ image[0].get('refstream'))
+ if imgstr:
+ plot_map[ro] = imgstr[0].get('file')
+
+ self.log('Converting XML to HTML...')
+ styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
+ media_type = MediaType()
+ styles = Styles()
+ text_block = TextBlock(styles, char_button_map, plot_map, log)
+ canvas = Canvas(doc, styles, text_block, log)
+ image_block = ImageBlock(canvas)
+ ruled_line = RuledLine()
+ extensions = {
+ ('calibre', 'media-type') : media_type,
+ ('calibre', 'text-block') : text_block,
+ ('calibre', 'ruled-line') : ruled_line,
+ ('calibre', 'styles') : styles,
+ ('calibre', 'canvas') : canvas,
+ ('calibre', 'image-block'): image_block,
+ }
+ transform = etree.XSLT(styledoc, extensions=extensions)
+ try:
+ result = transform(doc)
+ except RuntimeError:
+ sys.setrecursionlimit(5000)
+ result = transform(doc)
+
+ with open('content.opf', 'wb') as f:
+ f.write(result)
+ styles.write()
+ return os.path.abspath('content.opf')
diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py
similarity index 100%
rename from src/calibre/ebooks/mobi/input.py
rename to src/calibre/ebooks/conversion/plugins/mobi_input.py
diff --git a/src/calibre/ebooks/conversion/plugins/odt_input.py b/src/calibre/ebooks/conversion/plugins/odt_input.py
new file mode 100644
index 0000000000..5e92ea5163
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/odt_input.py
@@ -0,0 +1,25 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert an ODT file into a Open Ebook
+'''
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class ODTInput(InputFormatPlugin):
+
+ name = 'ODT Input'
+ author = 'Kovid Goyal'
+ description = 'Convert ODT (OpenOffice) files to HTML'
+ file_types = set(['odt'])
+
+
+ def convert(self, stream, options, file_ext, log,
+ accelerators):
+ from calibre.ebooks.odt.input import Extract
+ return Extract()(stream, '.', log)
+
+
diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/conversion/plugins/pdb_input.py
similarity index 87%
rename from src/calibre/ebooks/pdb/input.py
rename to src/calibre/ebooks/conversion/plugins/pdb_input.py
index cd861216af..69984ab268 100644
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/conversion/plugins/pdb_input.py
@@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.pdb.header import PdbHeaderReader
-from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
class PDBInput(InputFormatPlugin):
@@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.pdb.header import PdbHeaderReader
+ from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
+
header = PdbHeaderReader(stream)
Reader = get_reader(header.ident)
diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/conversion/plugins/pdf_input.py
similarity index 95%
rename from src/calibre/ebooks/pdf/input.py
rename to src/calibre/ebooks/conversion/plugins/pdf_input.py
index 51f44ba502..0a3821c584 100644
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/conversion/plugins/pdf_input.py
@@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.ebooks.pdf.pdftohtml import pdftohtml
-from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.constants import plugins
pdfreflow, pdfreflow_err = plugins['pdfreflow']
@@ -43,6 +41,9 @@ class PDFInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.pdf.pdftohtml import pdftohtml
+
log.debug('Converting file to html...')
# The main html file will be named index.html
self.opts, self.log = options, log
diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py
similarity index 96%
rename from src/calibre/ebooks/pml/input.py
rename to src/calibre/ebooks/conversion/plugins/pml_input.py
index 4d59668b12..1351a5c492 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_input.py
@@ -11,9 +11,6 @@ import shutil
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
-from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
-from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.metadata.opf2 import OPFCreator
class PMLInput(InputFormatPlugin):
@@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
file_types = set(['pml', 'pmlz'])
def process_pml(self, pml_path, html_path, close_all=False):
+ from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
+
pclose = False
hclose = False
@@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.metadata.toc import TOC
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+
self.options = options
self.log = log
pages, images = [], []
diff --git a/src/calibre/ebooks/rb/input.py b/src/calibre/ebooks/conversion/plugins/rb_input.py
similarity index 91%
rename from src/calibre/ebooks/rb/input.py
rename to src/calibre/ebooks/conversion/plugins/rb_input.py
index 8b05c1d42e..6a6ca3205a 100644
--- a/src/calibre/ebooks/rb/input.py
+++ b/src/calibre/ebooks/conversion/plugins/rb_input.py
@@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
import os
-from calibre.ebooks.rb.reader import Reader
from calibre.customize.conversion import InputFormatPlugin
class RBInput(InputFormatPlugin):
@@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.rb.reader import Reader
+
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
diff --git a/src/calibre/web/feeds/input.py b/src/calibre/ebooks/conversion/plugins/recipe_input.py
similarity index 100%
rename from src/calibre/web/feeds/input.py
rename to src/calibre/ebooks/conversion/plugins/recipe_input.py
diff --git a/src/calibre/ebooks/conversion/plugins/rtf_input.py b/src/calibre/ebooks/conversion/plugins/rtf_input.py
new file mode 100644
index 0000000000..91c285c10c
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py
@@ -0,0 +1,298 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal '
+
+import os, glob, re, textwrap
+
+from calibre.customize.conversion import InputFormatPlugin
+
+border_style_map = {
+ 'single' : 'solid',
+ 'double-thickness-border' : 'double',
+ 'shadowed-border': 'outset',
+ 'double-border': 'double',
+ 'dotted-border': 'dotted',
+ 'dashed': 'dashed',
+ 'hairline': 'solid',
+ 'inset': 'inset',
+ 'dash-small': 'dashed',
+ 'dot-dash': 'dotted',
+ 'dot-dot-dash': 'dotted',
+ 'outset': 'outset',
+ 'tripple': 'double',
+ 'triple': 'double',
+ 'thick-thin-small': 'solid',
+ 'thin-thick-small': 'solid',
+ 'thin-thick-thin-small': 'solid',
+ 'thick-thin-medium': 'solid',
+ 'thin-thick-medium': 'solid',
+ 'thin-thick-thin-medium': 'solid',
+ 'thick-thin-large': 'solid',
+ 'thin-thick-thin-large': 'solid',
+ 'wavy': 'ridge',
+ 'double-wavy': 'ridge',
+ 'striped': 'ridge',
+ 'emboss': 'inset',
+ 'engrave': 'inset',
+ 'frame': 'ridge',
+}
+
+
+class RTFInput(InputFormatPlugin):
+
+ name = 'RTF Input'
+ author = 'Kovid Goyal'
+ description = 'Convert RTF files to HTML'
+ file_types = set(['rtf'])
+
+ def generate_xml(self, stream):
+ from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
+ ofile = 'dataxml.xml'
+ run_lev, debug_dir, indent_out = 1, None, 0
+ if getattr(self.opts, 'debug_pipeline', None) is not None:
+ try:
+ os.mkdir('rtfdebug')
+ debug_dir = 'rtfdebug'
+ run_lev = 4
+ indent_out = 1
+ self.log('Running RTFParser in debug mode')
+ except:
+ self.log.warn('Impossible to run RTFParser in debug mode')
+ parser = ParseRtf(
+ in_file = stream,
+ out_file = ofile,
+ # Convert symbol fonts to unicode equivalents. Default
+ # is 1
+ convert_symbol = 1,
+
+ # Convert Zapf fonts to unicode equivalents. Default
+ # is 1.
+ convert_zapf = 1,
+
+ # Convert Wingding fonts to unicode equivalents.
+ # Default is 1.
+ convert_wingdings = 1,
+
+ # Convert RTF caps to real caps.
+ # Default is 1.
+ convert_caps = 1,
+
+ # Indent resulting XML.
+ # Default is 0 (no indent).
+ indent = indent_out,
+
+ # Form lists from RTF. Default is 1.
+ form_lists = 1,
+
+ # Convert headings to sections. Default is 0.
+ headings_to_sections = 1,
+
+ # Group paragraphs with the same style name. Default is 1.
+ group_styles = 1,
+
+ # Group borders. Default is 1.
+ group_borders = 1,
+
+ # Write or do not write paragraphs. Default is 0.
+ empty_paragraphs = 1,
+
+ #debug
+ deb_dir = debug_dir,
+ run_level = run_lev,
+ )
+ parser.parse_rtf()
+ with open(ofile, 'rb') as f:
+ return f.read()
+
+ def extract_images(self, picts):
+ import imghdr
+ self.log('Extracting images...')
+
+ with open(picts, 'rb') as f:
+ raw = f.read()
+ picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
+ hex = re.compile(r'[^a-fA-F0-9]')
+ encs = [hex.sub('', pict) for pict in picts]
+
+ count = 0
+ imap = {}
+ for enc in encs:
+ if len(enc) % 2 == 1:
+ enc = enc[:-1]
+ data = enc.decode('hex')
+ fmt = imghdr.what(None, data)
+ if fmt is None:
+ fmt = 'wmf'
+ count += 1
+ name = '%04d.%s' % (count, fmt)
+ with open(name, 'wb') as f:
+ f.write(data)
+ imap[count] = name
+ # with open(name+'.hex', 'wb') as f:
+ # f.write(enc)
+ return self.convert_images(imap)
+
+ def convert_images(self, imap):
+ self.default_img = None
+ for count, val in imap.iteritems():
+ try:
+ imap[count] = self.convert_image(val)
+ except:
+ self.log.exception('Failed to convert', val)
+ return imap
+
+ def convert_image(self, name):
+ if not name.endswith('.wmf'):
+ return name
+ try:
+ return self.rasterize_wmf(name)
+ except:
+ self.log.exception('Failed to convert WMF image %r'%name)
+ return self.replace_wmf(name)
+
+ def replace_wmf(self, name):
+ from calibre.ebooks import calibre_cover
+ if self.default_img is None:
+ self.default_img = calibre_cover('Conversion of WMF images is not supported',
+ 'Use Microsoft Word or OpenOffice to save this RTF file'
+ ' as HTML and convert that in calibre.', title_size=36,
+ author_size=20)
+ name = name.replace('.wmf', '.jpg')
+ with open(name, 'wb') as f:
+ f.write(self.default_img)
+ return name
+
+ def rasterize_wmf(self, name):
+ from calibre.utils.wmf.parse import wmf_unwrap
+ with open(name, 'rb') as f:
+ data = f.read()
+ data = wmf_unwrap(data)
+ name = name.replace('.wmf', '.png')
+ with open(name, 'wb') as f:
+ f.write(data)
+ return name
+
+
+ def write_inline_css(self, ic, border_styles):
+ font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
+ enumerate(ic.font_sizes)]
+ color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
+ enumerate(ic.colors)]
+ css = textwrap.dedent('''
+ span.none {
+ text-decoration: none; font-weight: normal;
+ font-style: normal; font-variant: normal
+ }
+
+ span.italics { font-style: italic }
+
+ span.bold { font-weight: bold }
+
+ span.small-caps { font-variant: small-caps }
+
+ span.underlined { text-decoration: underline }
+
+ span.strike-through { text-decoration: line-through }
+
+ ''')
+ css += '\n'+'\n'.join(font_size_classes)
+ css += '\n' +'\n'.join(color_classes)
+
+ for cls, val in border_styles.iteritems():
+ css += '\n\n.%s {\n%s\n}'%(cls, val)
+
+ with open('styles.css', 'ab') as f:
+ f.write(css)
+
+ def convert_borders(self, doc):
+ border_styles = []
+ style_map = {}
+ for elem in doc.xpath(r'//*[local-name()="cell"]'):
+ style = ['border-style: hidden', 'border-width: 1px',
+ 'border-color: black']
+ for x in ('bottom', 'top', 'left', 'right'):
+ bs = elem.get('border-cell-%s-style'%x, None)
+ if bs:
+ cbs = border_style_map.get(bs, 'solid')
+ style.append('border-%s-style: %s'%(x, cbs))
+ bw = elem.get('border-cell-%s-line-width'%x, None)
+ if bw:
+ style.append('border-%s-width: %spt'%(x, bw))
+ bc = elem.get('border-cell-%s-color'%x, None)
+ if bc:
+ style.append('border-%s-color: %s'%(x, bc))
+ style = ';\n'.join(style)
+ if style not in border_styles:
+ border_styles.append(style)
+ idx = border_styles.index(style)
+ cls = 'border_style%d'%idx
+ style_map[cls] = style
+ elem.set('class', cls)
+ return style_map
+
+ def convert(self, stream, options, file_ext, log,
+ accelerators):
+ from lxml import etree
+ from calibre.ebooks.metadata.meta import get_metadata
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
+ from calibre.ebooks.rtf.input import InlineClass
+ self.opts = options
+ self.log = log
+ self.log('Converting RTF to XML...')
+ try:
+ xml = self.generate_xml(stream.name)
+ except RtfInvalidCodeException as e:
+ raise ValueError(_('This RTF file has a feature calibre does not '
+ 'support. Convert it to HTML first and then try it.\n%s')%e)
+
+ d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
+ if d:
+ imap = {}
+ try:
+ imap = self.extract_images(d[0])
+ except:
+ self.log.exception('Failed to extract images...')
+
+ self.log('Parsing XML...')
+ parser = etree.XMLParser(recover=True, no_network=True)
+ doc = etree.fromstring(xml, parser=parser)
+ border_styles = self.convert_borders(doc)
+ for pict in doc.xpath('//rtf:pict[@num]',
+ namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
+ num = int(pict.get('num'))
+ name = imap.get(num, None)
+ if name is not None:
+ pict.set('num', name)
+
+ self.log('Converting XML to HTML...')
+ inline_class = InlineClass(self.log)
+ styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
+ extensions = { ('calibre', 'inline-class') : inline_class }
+ transform = etree.XSLT(styledoc, extensions=extensions)
+ result = transform(doc)
+ html = 'index.xhtml'
+ with open(html, 'wb') as f:
+ res = transform.tostring(result)
+ # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
+ #clean multiple \n
+ res = re.sub('\n+', '\n', res)
+ # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
+ # res = re.sub('\s*', '', res)
+ # res = re.sub('(?<=\n)\n{2}',
+ # u'\u00a0
\n'.encode('utf-8'), res)
+ f.write(res)
+ self.write_inline_css(inline_class, border_styles)
+ stream.seek(0)
+ mi = get_metadata(stream, 'rtf')
+ if not mi.title:
+ mi.title = _('Unknown')
+ if not mi.authors:
+ mi.authors = [_('Unknown')]
+ opf = OPFCreator(os.getcwd(), mi)
+ opf.create_manifest([('index.xhtml', None)])
+ opf.create_spine(['index.xhtml'])
+ opf.render(open('metadata.opf', 'wb'))
+ return os.path.abspath('metadata.opf')
+
+
diff --git a/src/calibre/ebooks/snb/input.py b/src/calibre/ebooks/conversion/plugins/snb_input.py
similarity index 97%
rename from src/calibre/ebooks/snb/input.py
rename to src/calibre/ebooks/conversion/plugins/snb_input.py
index 13b1ca45f9..ae3ab0033c 100755
--- a/src/calibre/ebooks/snb/input.py
+++ b/src/calibre/ebooks/conversion/plugins/snb_input.py
@@ -4,13 +4,11 @@ __license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi '
__docformat__ = 'restructuredtext en'
-import os, uuid
+import os
from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
-from lxml import etree
HTML_TEMPLATE = u'%s\n%s\n'
@@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ import uuid
+ from lxml import etree
+
from calibre.ebooks.oeb.base import DirContainer
+ from calibre.ebooks.snb.snbfile import SNBFile
+
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:
diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/conversion/plugins/tcr_input.py
similarity index 94%
rename from src/calibre/ebooks/tcr/input.py
rename to src/calibre/ebooks/conversion/plugins/tcr_input.py
index 4d15fd0923..de4f3f5f40 100644
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/conversion/plugins/tcr_input.py
@@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.compression.tcr import decompress
class TCRInput(InputFormatPlugin):
@@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin):
file_types = set(['tcr'])
def convert(self, stream, options, file_ext, log, accelerators):
+ from calibre.ebooks.compression.tcr import decompress
+
log.info('Decompressing text...')
raw_txt = decompress(stream)
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py
similarity index 94%
rename from src/calibre/ebooks/txt/input.py
rename to src/calibre/ebooks/conversion/plugins/txt_input.py
index 49c8a2129d..e916b30c29 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@@ -8,14 +8,6 @@ import os
from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
-from calibre.ebooks.chardet import detect
-from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
- separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
- preserve_spaces, detect_paragraph_type, detect_formatting_type, \
- normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
- separate_hard_scene_breaks
-from calibre.utils.zipfile import ZipFile
class TXTInput(InputFormatPlugin):
@@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
+ from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
+ from calibre.ebooks.chardet import detect
+ from calibre.utils.zipfile import ZipFile
+ from calibre.ebooks.txt.processor import (convert_basic,
+ convert_markdown, separate_paragraphs_single_line,
+ separate_paragraphs_print_formatted, preserve_spaces,
+ detect_paragraph_type, detect_formatting_type,
+ normalize_line_endings, convert_textile, remove_indents,
+ block_to_single_line, separate_hard_scene_breaks)
+
+
self.log = log
txt = ''
log.debug('Reading text from file...')
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index d303dd66a5..6cacb34edc 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en'
Input plugin for HTML or OPF ebooks.
'''
-import os, re, sys, uuid, tempfile, errno as gerrno
+import os, re, sys, errno as gerrno
from urlparse import urlparse, urlunparse
-from urllib import unquote, quote
-from functools import partial
-from itertools import izip
+from urllib import unquote
-from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import detect_xml_encoding
-from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux, isbsd, iswindows
+from calibre.constants import iswindows
from calibre import unicode_path, as_unicode
-from calibre.utils.localization import get_lang
-from calibre.utils.filenames import ascii_filename
class Link(object):
'''
@@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log):
return filelist
-class HTMLInput(InputFormatPlugin):
- name = 'HTML Input'
- author = 'Kovid Goyal'
- description = 'Convert HTML and OPF files to an OEB'
- file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
-
- options = set([
- OptionRecommendation(name='breadth_first',
- recommended_value=False, level=OptionRecommendation.LOW,
- help=_('Traverse links in HTML files breadth first. Normally, '
- 'they are traversed depth first.'
- )
- ),
-
- OptionRecommendation(name='max_levels',
- recommended_value=5, level=OptionRecommendation.LOW,
- help=_('Maximum levels of recursion when following links in '
- 'HTML files. Must be non-negative. 0 implies that no '
- 'links in the root HTML file are followed. Default is '
- '%default.'
- )
- ),
-
- OptionRecommendation(name='dont_package',
- recommended_value=False, level=OptionRecommendation.LOW,
- help=_('Normally this input plugin re-arranges all the input '
- 'files into a standard folder hierarchy. Only use this option '
- 'if you know what you are doing as it can result in various '
- 'nasty side effects in the rest of the conversion pipeline.'
- )
- ),
-
- ])
-
- def convert(self, stream, opts, file_ext, log,
- accelerators):
- self._is_case_sensitive = None
- basedir = os.getcwd()
- self.opts = opts
-
- fname = None
- if hasattr(stream, 'name'):
- basedir = os.path.dirname(stream.name)
- fname = os.path.basename(stream.name)
-
- if file_ext != 'opf':
- if opts.dont_package:
- raise ValueError('The --dont-package option is not supported for an HTML input file')
- from calibre.ebooks.metadata.html import get_metadata
- mi = get_metadata(stream)
- if fname:
- from calibre.ebooks.metadata.meta import metadata_from_filename
- fmi = metadata_from_filename(fname)
- fmi.smart_update(mi)
- mi = fmi
- oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
- return oeb
-
- from calibre.ebooks.conversion.plumber import create_oebbook
- return create_oebbook(log, stream.name, opts,
- encoding=opts.input_encoding)
-
- def is_case_sensitive(self, path):
- if getattr(self, '_is_case_sensitive', None) is not None:
- return self._is_case_sensitive
- if not path or not os.path.exists(path):
- return islinux or isbsd
- self._is_case_sensitive = not (os.path.exists(path.lower()) \
- and os.path.exists(path.upper()))
- return self._is_case_sensitive
-
- def create_oebbook(self, htmlpath, basedir, opts, log, mi):
- from calibre.ebooks.conversion.plumber import create_oebbook
- from calibre.ebooks.oeb.base import (DirContainer,
- rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
- xpath)
- from calibre import guess_type
- from calibre.ebooks.oeb.transforms.metadata import \
- meta_info_to_oeb_metadata
- import cssutils, logging
- cssutils.log.setLevel(logging.WARN)
- self.OEB_STYLES = OEB_STYLES
- oeb = create_oebbook(log, None, opts, self,
- encoding=opts.input_encoding, populate=False)
- self.oeb = oeb
-
- metadata = oeb.metadata
- meta_info_to_oeb_metadata(mi, metadata, log)
- if not metadata.language:
- oeb.logger.warn(u'Language not specified')
- metadata.add('language', get_lang().replace('_', '-'))
- if not metadata.creator:
- oeb.logger.warn('Creator not specified')
- metadata.add('creator', self.oeb.translate(__('Unknown')))
- if not metadata.title:
- oeb.logger.warn('Title not specified')
- metadata.add('title', self.oeb.translate(__('Unknown')))
- bookid = str(uuid.uuid4())
- metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
- for ident in metadata.identifier:
- if 'id' in ident.attrib:
- self.oeb.uid = metadata.identifier[0]
- break
-
- filelist = get_filelist(htmlpath, basedir, opts, log)
- filelist = [f for f in filelist if not f.is_binary]
- htmlfile_map = {}
- for f in filelist:
- path = f.path
- oeb.container = DirContainer(os.path.dirname(path), log,
- ignore_opf=True)
- bname = os.path.basename(path)
- id, href = oeb.manifest.generate(id='html',
- href=ascii_filename(bname))
- htmlfile_map[path] = href
- item = oeb.manifest.add(id, href, 'text/html')
- item.html_input_href = bname
- oeb.spine.add(item, True)
-
- self.added_resources = {}
- self.log = log
- self.log('Normalizing filename cases')
- for path, href in htmlfile_map.items():
- if not self.is_case_sensitive(path):
- path = path.lower()
- self.added_resources[path] = href
- self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
- self.urldefrag = urldefrag
- self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
-
- self.log('Rewriting HTML links')
- for f in filelist:
- path = f.path
- dpath = os.path.dirname(path)
- oeb.container = DirContainer(dpath, log, ignore_opf=True)
- item = oeb.manifest.hrefs[htmlfile_map[path]]
- rewrite_links(item.data, partial(self.resource_adder, base=dpath))
-
- for item in oeb.manifest.values():
- if item.media_type in self.OEB_STYLES:
- dpath = None
- for path, href in self.added_resources.items():
- if href == item.href:
- dpath = os.path.dirname(path)
- break
- cssutils.replaceUrls(item.data,
- partial(self.resource_adder, base=dpath))
-
- toc = self.oeb.toc
- self.oeb.auto_generated_toc = True
- titles = []
- headers = []
- for item in self.oeb.spine:
- if not item.linear: continue
- html = item.data
- title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
- title = re.sub(r'\s+', ' ', title.strip())
- if title:
- titles.append(title)
- headers.append('(unlabled)')
- for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
- expr = '/h:html/h:body//h:%s[position()=1]/text()'
- header = ''.join(xpath(html, expr % tag))
- header = re.sub(r'\s+', ' ', header.strip())
- if header:
- headers[-1] = header
- break
- use = titles
- if len(titles) > len(set(titles)):
- use = headers
- for title, item in izip(use, self.oeb.spine):
- if not item.linear: continue
- toc.add(title, item.href)
-
- oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
- return oeb
-
- def link_to_local_path(self, link_, base=None):
- if not isinstance(link_, unicode):
- try:
- link_ = link_.decode('utf-8', 'error')
- except:
- self.log.warn('Failed to decode link %r. Ignoring'%link_)
- return None, None
- try:
- l = Link(link_, base if base else os.getcwdu())
- except:
- self.log.exception('Failed to process link: %r'%link_)
- return None, None
- if l.path is None:
- # Not a local resource
- return None, None
- link = l.path.replace('/', os.sep).strip()
- frag = l.fragment
- if not link:
- return None, None
- return link, frag
-
- def resource_adder(self, link_, base=None):
- link, frag = self.link_to_local_path(link_, base=base)
- if link is None:
- return link_
- try:
- if base and not os.path.isabs(link):
- link = os.path.join(base, link)
- link = os.path.abspath(link)
- except:
- return link_
- if not os.access(link, os.R_OK):
- return link_
- if os.path.isdir(link):
- self.log.warn(link_, 'is a link to a directory. Ignoring.')
- return link_
- if not self.is_case_sensitive(tempfile.gettempdir()):
- link = link.lower()
- if link not in self.added_resources:
- bhref = os.path.basename(link)
- id, href = self.oeb.manifest.generate(id='added',
- href=bhref)
- guessed = self.guess_type(href)[0]
- media_type = guessed or self.BINARY_MIME
- if media_type == 'text/plain':
- self.log.warn('Ignoring link to text file %r'%link_)
- return None
-
- self.oeb.log.debug('Added', link)
- self.oeb.container = self.DirContainer(os.path.dirname(link),
- self.oeb.log, ignore_opf=True)
- # Load into memory
- item = self.oeb.manifest.add(id, href, media_type)
- # bhref refers to an already existing file. The read() method of
- # DirContainer will call unquote on it before trying to read the
- # file, therefore we quote it here.
- if isinstance(bhref, unicode):
- bhref = bhref.encode('utf-8')
- item.html_input_href = quote(bhref).decode('utf-8')
- if guessed in self.OEB_STYLES:
- item.override_css_fetch = partial(
- self.css_import_handler, os.path.dirname(link))
- item.data
- self.added_resources[link] = href
-
- nlink = self.added_resources[link]
- if frag:
- nlink = '#'.join((nlink, frag))
- return nlink
-
- def css_import_handler(self, base, href):
- link, frag = self.link_to_local_path(href, base=base)
- if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
- return (None, None)
- try:
- raw = open(link, 'rb').read().decode('utf-8', 'replace')
- raw = self.oeb.css_preprocessor(raw, add_namespace=True)
- except:
- self.log.exception('Failed to read CSS file: %r'%link)
- return (None, None)
- return (None, raw)
diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py
index 9777a8a998..e9bf42c6bd 100644
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@@ -6,12 +6,11 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, textwrap, sys, operator
+import textwrap, operator
from copy import deepcopy, copy
from lxml import etree
-from calibre.customize.conversion import InputFormatPlugin
from calibre import guess_type
class Canvas(etree.XSLTExtension):
@@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension):
-class LRFInput(InputFormatPlugin):
- name = 'LRF Input'
- author = 'Kovid Goyal'
- description = 'Convert LRF files to HTML'
- file_types = set(['lrf'])
-
- def convert(self, stream, options, file_ext, log,
- accelerators):
- self.log = log
- self.log('Generating XML')
- from calibre.ebooks.lrf.lrfparser import LRFDocument
- d = LRFDocument(stream)
- d.parse()
- xml = d.to_xml(write_files=True)
- if options.verbose > 2:
- open('lrs.xml', 'wb').write(xml.encode('utf-8'))
- parser = etree.XMLParser(no_network=True, huge_tree=True)
- try:
- doc = etree.fromstring(xml, parser=parser)
- except:
- self.log.warn('Failed to parse XML. Trying to recover')
- parser = etree.XMLParser(no_network=True, huge_tree=True,
- recover=True)
- doc = etree.fromstring(xml, parser=parser)
-
-
- char_button_map = {}
- for x in doc.xpath('//CharButton[@refobj]'):
- ro = x.get('refobj')
- jump_button = doc.xpath('//*[@objid="%s"]'%ro)
- if jump_button:
- jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
- if jump_to:
- char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
- jump_to[0].get('refobj'))
- plot_map = {}
- for x in doc.xpath('//Plot[@refobj]'):
- ro = x.get('refobj')
- image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
- if image:
- imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
- image[0].get('refstream'))
- if imgstr:
- plot_map[ro] = imgstr[0].get('file')
-
- self.log('Converting XML to HTML...')
- styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
- media_type = MediaType()
- styles = Styles()
- text_block = TextBlock(styles, char_button_map, plot_map, log)
- canvas = Canvas(doc, styles, text_block, log)
- image_block = ImageBlock(canvas)
- ruled_line = RuledLine()
- extensions = {
- ('calibre', 'media-type') : media_type,
- ('calibre', 'text-block') : text_block,
- ('calibre', 'ruled-line') : ruled_line,
- ('calibre', 'styles') : styles,
- ('calibre', 'canvas') : canvas,
- ('calibre', 'image-block'): image_block,
- }
- transform = etree.XSLT(styledoc, extensions=extensions)
- try:
- result = transform(doc)
- except RuntimeError:
- sys.setrecursionlimit(5000)
- result = transform(doc)
-
- with open('content.opf', 'wb') as f:
- f.write(result)
- styles.write()
- return os.path.abspath('content.opf')
diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py
index 214a40c29b..430d95b31f 100644
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@@ -12,7 +12,6 @@ from lxml import etree
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
-from calibre.customize.conversion import InputFormatPlugin
class Extract(ODF2XHTML):
@@ -178,16 +177,4 @@ class Extract(ODF2XHTML):
return os.path.abspath('metadata.opf')
-class ODTInput(InputFormatPlugin):
-
- name = 'ODT Input'
- author = 'Kovid Goyal'
- description = 'Convert ODT (OpenOffice) files to HTML'
- file_types = set(['odt'])
-
-
- def convert(self, stream, options, file_ext, log,
- accelerators):
- return Extract()(stream, '.', log)
-
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 5858824434..8e1a5ac775 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -2,42 +2,9 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
-import os, glob, re, textwrap
from lxml import etree
-from calibre.customize.conversion import InputFormatPlugin
-
-border_style_map = {
- 'single' : 'solid',
- 'double-thickness-border' : 'double',
- 'shadowed-border': 'outset',
- 'double-border': 'double',
- 'dotted-border': 'dotted',
- 'dashed': 'dashed',
- 'hairline': 'solid',
- 'inset': 'inset',
- 'dash-small': 'dashed',
- 'dot-dash': 'dotted',
- 'dot-dot-dash': 'dotted',
- 'outset': 'outset',
- 'tripple': 'double',
- 'triple': 'double',
- 'thick-thin-small': 'solid',
- 'thin-thick-small': 'solid',
- 'thin-thick-thin-small': 'solid',
- 'thick-thin-medium': 'solid',
- 'thin-thick-medium': 'solid',
- 'thin-thick-thin-medium': 'solid',
- 'thick-thin-large': 'solid',
- 'thin-thick-thin-large': 'solid',
- 'wavy': 'ridge',
- 'double-wavy': 'ridge',
- 'striped': 'ridge',
- 'emboss': 'inset',
- 'engrave': 'inset',
- 'frame': 'ridge',
-}
class InlineClass(etree.XSLTExtension):
@@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension):
output_parent.text = ' '.join(classes)
-class RTFInput(InputFormatPlugin):
-
- name = 'RTF Input'
- author = 'Kovid Goyal'
- description = 'Convert RTF files to HTML'
- file_types = set(['rtf'])
-
- def generate_xml(self, stream):
- from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
- ofile = 'dataxml.xml'
- run_lev, debug_dir, indent_out = 1, None, 0
- if getattr(self.opts, 'debug_pipeline', None) is not None:
- try:
- os.mkdir('rtfdebug')
- debug_dir = 'rtfdebug'
- run_lev = 4
- indent_out = 1
- self.log('Running RTFParser in debug mode')
- except:
- self.log.warn('Impossible to run RTFParser in debug mode')
- parser = ParseRtf(
- in_file = stream,
- out_file = ofile,
- # Convert symbol fonts to unicode equivalents. Default
- # is 1
- convert_symbol = 1,
-
- # Convert Zapf fonts to unicode equivalents. Default
- # is 1.
- convert_zapf = 1,
-
- # Convert Wingding fonts to unicode equivalents.
- # Default is 1.
- convert_wingdings = 1,
-
- # Convert RTF caps to real caps.
- # Default is 1.
- convert_caps = 1,
-
- # Indent resulting XML.
- # Default is 0 (no indent).
- indent = indent_out,
-
- # Form lists from RTF. Default is 1.
- form_lists = 1,
-
- # Convert headings to sections. Default is 0.
- headings_to_sections = 1,
-
- # Group paragraphs with the same style name. Default is 1.
- group_styles = 1,
-
- # Group borders. Default is 1.
- group_borders = 1,
-
- # Write or do not write paragraphs. Default is 0.
- empty_paragraphs = 1,
-
- #debug
- deb_dir = debug_dir,
- run_level = run_lev,
- )
- parser.parse_rtf()
- with open(ofile, 'rb') as f:
- return f.read()
-
- def extract_images(self, picts):
- import imghdr
- self.log('Extracting images...')
-
- with open(picts, 'rb') as f:
- raw = f.read()
- picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
- hex = re.compile(r'[^a-fA-F0-9]')
- encs = [hex.sub('', pict) for pict in picts]
-
- count = 0
- imap = {}
- for enc in encs:
- if len(enc) % 2 == 1:
- enc = enc[:-1]
- data = enc.decode('hex')
- fmt = imghdr.what(None, data)
- if fmt is None:
- fmt = 'wmf'
- count += 1
- name = '%04d.%s' % (count, fmt)
- with open(name, 'wb') as f:
- f.write(data)
- imap[count] = name
- # with open(name+'.hex', 'wb') as f:
- # f.write(enc)
- return self.convert_images(imap)
-
- def convert_images(self, imap):
- self.default_img = None
- for count, val in imap.iteritems():
- try:
- imap[count] = self.convert_image(val)
- except:
- self.log.exception('Failed to convert', val)
- return imap
-
- def convert_image(self, name):
- if not name.endswith('.wmf'):
- return name
- try:
- return self.rasterize_wmf(name)
- except:
- self.log.exception('Failed to convert WMF image %r'%name)
- return self.replace_wmf(name)
-
- def replace_wmf(self, name):
- from calibre.ebooks import calibre_cover
- if self.default_img is None:
- self.default_img = calibre_cover('Conversion of WMF images is not supported',
- 'Use Microsoft Word or OpenOffice to save this RTF file'
- ' as HTML and convert that in calibre.', title_size=36,
- author_size=20)
- name = name.replace('.wmf', '.jpg')
- with open(name, 'wb') as f:
- f.write(self.default_img)
- return name
-
- def rasterize_wmf(self, name):
- from calibre.utils.wmf.parse import wmf_unwrap
- with open(name, 'rb') as f:
- data = f.read()
- data = wmf_unwrap(data)
- name = name.replace('.wmf', '.png')
- with open(name, 'wb') as f:
- f.write(data)
- return name
-
-
- def write_inline_css(self, ic, border_styles):
- font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
- enumerate(ic.font_sizes)]
- color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
- enumerate(ic.colors)]
- css = textwrap.dedent('''
- span.none {
- text-decoration: none; font-weight: normal;
- font-style: normal; font-variant: normal
- }
-
- span.italics { font-style: italic }
-
- span.bold { font-weight: bold }
-
- span.small-caps { font-variant: small-caps }
-
- span.underlined { text-decoration: underline }
-
- span.strike-through { text-decoration: line-through }
-
- ''')
- css += '\n'+'\n'.join(font_size_classes)
- css += '\n' +'\n'.join(color_classes)
-
- for cls, val in border_styles.iteritems():
- css += '\n\n.%s {\n%s\n}'%(cls, val)
-
- with open('styles.css', 'ab') as f:
- f.write(css)
-
- def convert_borders(self, doc):
- border_styles = []
- style_map = {}
- for elem in doc.xpath(r'//*[local-name()="cell"]'):
- style = ['border-style: hidden', 'border-width: 1px',
- 'border-color: black']
- for x in ('bottom', 'top', 'left', 'right'):
- bs = elem.get('border-cell-%s-style'%x, None)
- if bs:
- cbs = border_style_map.get(bs, 'solid')
- style.append('border-%s-style: %s'%(x, cbs))
- bw = elem.get('border-cell-%s-line-width'%x, None)
- if bw:
- style.append('border-%s-width: %spt'%(x, bw))
- bc = elem.get('border-cell-%s-color'%x, None)
- if bc:
- style.append('border-%s-color: %s'%(x, bc))
- style = ';\n'.join(style)
- if style not in border_styles:
- border_styles.append(style)
- idx = border_styles.index(style)
- cls = 'border_style%d'%idx
- style_map[cls] = style
- elem.set('class', cls)
- return style_map
-
- def convert(self, stream, options, file_ext, log,
- accelerators):
- from calibre.ebooks.metadata.meta import get_metadata
- from calibre.ebooks.metadata.opf2 import OPFCreator
- from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
- self.opts = options
- self.log = log
- self.log('Converting RTF to XML...')
- try:
- xml = self.generate_xml(stream.name)
- except RtfInvalidCodeException as e:
- raise ValueError(_('This RTF file has a feature calibre does not '
- 'support. Convert it to HTML first and then try it.\n%s')%e)
-
- d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
- if d:
- imap = {}
- try:
- imap = self.extract_images(d[0])
- except:
- self.log.exception('Failed to extract images...')
-
- self.log('Parsing XML...')
- parser = etree.XMLParser(recover=True, no_network=True)
- doc = etree.fromstring(xml, parser=parser)
- border_styles = self.convert_borders(doc)
- for pict in doc.xpath('//rtf:pict[@num]',
- namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
- num = int(pict.get('num'))
- name = imap.get(num, None)
- if name is not None:
- pict.set('num', name)
-
- self.log('Converting XML to HTML...')
- inline_class = InlineClass(self.log)
- styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
- extensions = { ('calibre', 'inline-class') : inline_class }
- transform = etree.XSLT(styledoc, extensions=extensions)
- result = transform(doc)
- html = 'index.xhtml'
- with open(html, 'wb') as f:
- res = transform.tostring(result)
- # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
- #clean multiple \n
- res = re.sub('\n+', '\n', res)
- # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
- # res = re.sub('\s*', '', res)
- # res = re.sub('(?<=\n)\n{2}',
- # u'\u00a0
\n'.encode('utf-8'), res)
- f.write(res)
- self.write_inline_css(inline_class, border_styles)
- stream.seek(0)
- mi = get_metadata(stream, 'rtf')
- if not mi.title:
- mi.title = _('Unknown')
- if not mi.authors:
- mi.authors = [_('Unknown')]
- opf = OPFCreator(os.getcwd(), mi)
- opf.create_manifest([('index.xhtml', None)])
- opf.create_spine(['index.xhtml'])
- opf.render(open('metadata.opf', 'wb'))
- return os.path.abspath('metadata.opf')
-
-#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
-# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
-# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"