From 54dd263be1ab436bf55027b469f77e40cf155997 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2009 09:23:44 -0700 Subject: [PATCH 1/9] IGN:Fix bug in anchor insertion routine where it was inserting an anchor in the wrong place --- src/calibre/ebooks/mobi/reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index d217f5be6a..1f84c4f5d4 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -587,7 +587,8 @@ class MobiReader(object): if r > -1 and (r < l or l == end or l == -1): p = self.mobi_html.rfind('<', 0, end + 1) if pos < end and p > -1 and \ - not end_tag_re.match(self.mobi_html[p:r]): + not end_tag_re.match(self.mobi_html[p:r]) and \ + not self.mobi_html[p:r+1].endswith('/>'): anchor = ' filepos-id="filepos%d"' end = r else: From 8e68f9d4dd1e630b0ae8391f10c25c6e6d9a1743 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Apr 2009 15:25:25 -0700 Subject: [PATCH 2/9] Fix #2350 (conversion of a mobipocket file (huffdic compressed) fail) --- src/calibre/ebooks/mobi/reader.py | 1 + src/calibre/utils/terminfo.py | 66 ++++++++++++++++--------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 1f84c4f5d4..95a05e69c0 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -382,6 +382,7 @@ class MobiReader(object): } mobi_version = self.book_header.mobi_version for tag in root.iter(etree.Element): + tag.attrib.pop('xmlns', '') if tag.tag in ('country-region', 'place', 'placetype', 'placename', 'state', 'city', 'street', 'address', 'content'): tag.tag = 'div' if tag.tag == 'content' else 'span' diff --git a/src/calibre/utils/terminfo.py b/src/calibre/utils/terminfo.py index 075c0e694d..1cbd4d830a 100644 --- a/src/calibre/utils/terminfo.py +++ b/src/calibre/utils/terminfo.py @@ -7,22 +7,22 @@ import sys, re, os class TerminalController: """ A class that can be used to portably generate formatted output to - a terminal. - + a terminal. + `TerminalController` defines a set of instance variables whose values are initialized to the control sequence necessary to perform a given action. These can be simply included in normal output to the terminal: - + >>> term = TerminalController() >>> print 'This is '+term.GREEN+'green'+term.NORMAL - + Alternatively, the `render()` method can used, which replaces '${action}' with the string required to perform 'action': - + >>> term = TerminalController() >>> print term.render('This is ${GREEN}green${NORMAL}') - + If the terminal doesn't support a given action, then the value of the corresponding instance variable will be set to ''. As a result, the above code will still work on terminals that do not @@ -30,11 +30,11 @@ class TerminalController: Also, this means that you can test whether the terminal supports a given action by simply testing the truth value of the corresponding instance variable: - + >>> term = TerminalController() >>> if term.CLEAR_SCREEN: ... print 'This terminal supports clearning the screen.' - + Finally, if the width and height of the terminal are known, then they will be stored in the `COLS` and `LINES` attributes. """ @@ -44,35 +44,35 @@ class TerminalController: DOWN = '' #: Move the cursor down one line LEFT = '' #: Move the cursor left one char RIGHT = '' #: Move the cursor right one char - + # Deletion: CLEAR_SCREEN = '' #: Clear the screen and move to home position CLEAR_EOL = '' #: Clear to the end of the line. CLEAR_BOL = '' #: Clear to the beginning of the line. CLEAR_EOS = '' #: Clear to the end of the screen - + # Output modes: BOLD = '' #: Turn on bold mode BLINK = '' #: Turn on blink mode DIM = '' #: Turn on half-bright mode REVERSE = '' #: Turn on reverse-video mode NORMAL = '' #: Turn off all modes - + # Cursor display: HIDE_CURSOR = '' #: Make the cursor invisible SHOW_CURSOR = '' #: Make the cursor visible - + # Terminal size: COLS = None #: Width of the terminal (None for unknown) LINES = None #: Height of the terminal (None for unknown) - + # Foreground colors: BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = '' - + # Background colors: BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = '' BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = '' - + _STRING_CAPABILITIES = """ BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1 CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold @@ -80,7 +80,7 @@ class TerminalController: HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split() _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split() _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split() - + def __init__(self, term_stream=sys.stdout): """ Create a `TerminalController` and initialize its attributes @@ -92,24 +92,24 @@ class TerminalController: # Curses isn't available on all platforms try: import curses except: return - + # If the stream isn't a tty, then assume it has no capabilities. if os.environ.get('CALIBRE_WORKER', None) is not None or not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return - + # Check the terminal type. If we fail, then assume that the # terminal has no capabilities. try: curses.setupterm() except: return - + # Look up numeric capabilities. self.COLS = curses.tigetnum('cols') self.LINES = curses.tigetnum('lines') - + # Look up string capabilities. for capability in self._STRING_CAPABILITIES: (attrib, cap_name) = capability.split('=') setattr(self, attrib, self._tigetstr(cap_name) or '') - + # Colors set_fg = self._tigetstr('setf') if set_fg: @@ -127,7 +127,7 @@ class TerminalController: if set_bg_ansi: for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '') - + def _tigetstr(self, cap_name): # String capabilities can include "delays" of the form "$<2>". # For any modern terminal, we should be able to just ignore @@ -135,7 +135,7 @@ class TerminalController: import curses cap = curses.tigetstr(cap_name) or '' return re.sub(r'\$<\d+>[/*]?', '', cap) - + def render(self, template): """ Replace each $-substitutions in the given template string with @@ -143,7 +143,7 @@ class TerminalController: '' (if it's not). """ return re.sub(r'\$\$|\${\w+}', self._render_sub, template) - + def _render_sub(self, match): s = match.group() if s == '$$': return s @@ -156,20 +156,20 @@ class TerminalController: class ProgressBar: """ A 3-line progress bar, which looks like:: - + Header 20% [===========----------------------------------] progress message - + The progress bar is colored, if the terminal supports color output; and adjusts to the width of the terminal. - + If the terminal doesn't have the required capabilities, it uses a simple progress bar. """ BAR = '%3d%% ${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}\n' HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n' - + def __init__(self, term, header, no_progress_bar = False): self.term, self.no_progress_bar = term, no_progress_bar self.fancy = self.term.CLEAR_EOL and self.term.UP and self.term.BOL @@ -177,12 +177,14 @@ class ProgressBar: self.width = self.term.COLS or 75 self.bar = term.render(self.BAR) self.header = self.term.render(self.HEADER % header.center(self.width)) + if isinstance(self.header, unicode): + self.header = self.header.encode('utf-8') self.cleared = 1 #: true if we haven't drawn the bar yet. - + def update(self, percent, message=''): if isinstance(message, unicode): message = message.encode('utf-8', 'replace') - + if self.no_progress_bar: if message: print message @@ -203,8 +205,8 @@ class ProgressBar: else: print '%d%%'%(percent*100), message sys.stdout.flush() - - + + def clear(self): if self.fancy and not self.cleared: sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL + From 021149097fdea34eba3c52045bf64c32b5508146 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Apr 2009 15:35:24 -0700 Subject: [PATCH 3/9] Fix #2311 (Failure to parse an lrf file) --- src/calibre/ebooks/lrf/lrfparser.py | 32 ++++++++++++++--------------- src/calibre/ebooks/lrf/objects.py | 7 ++++++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/lrf/lrfparser.py b/src/calibre/ebooks/lrf/lrfparser.py index d74841515c..566f0b38f1 100644 --- a/src/calibre/ebooks/lrf/lrfparser.py +++ b/src/calibre/ebooks/lrf/lrfparser.py @@ -5,16 +5,16 @@ __copyright__ = '2008, Kovid Goyal ' import sys, array, os, re, codecs, logging from calibre import setup_cli_handlers, sanitize_file_name -from calibre.utils.config import OptionParser +from calibre.utils.config import OptionParser from calibre.ebooks.lrf.meta import LRFMetaFile from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \ Font, Text, TOCObject, BookAttr, ruby_tags - + class LRFDocument(LRFMetaFile): - + class temp(object): pass - + def __init__(self, stream): LRFMetaFile.__init__(self, stream) self.scramble_key = self.xor_key @@ -23,11 +23,11 @@ class LRFDocument(LRFMetaFile): self.image_map = {} self.toc = '' self.keep_parsing = True - + def parse(self): self._parse_objects() self.metadata = LRFDocument.temp() - for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', + for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', 'classification', 'free_text', 'publisher', 'label', 'category'): setattr(self.metadata, a, getattr(self, a)) self.doc_info = LRFDocument.temp() @@ -37,7 +37,7 @@ class LRFDocument(LRFMetaFile): self.device_info = LRFDocument.temp() for a in ('dpi', 'width', 'height'): setattr(self.device_info, a, getattr(self, a)) - + def _parse_objects(self): self.objects = {} self._file.seek(self.object_index_offset) @@ -68,15 +68,15 @@ class LRFDocument(LRFMetaFile): attr = h[0] if hasattr(obj, attr): self.ruby_tags[attr] = getattr(obj, attr) - + def __iter__(self): for pt in self.page_trees: yield pt - + def write_files(self): for obj in self.image_map.values() + self.font_map.values(): - open(obj.file, 'wb').write(obj.stream) - + open(obj.file, 'wb').write(obj.stream) + def to_xml(self, write_files=True): bookinfo = u'\n\n\n' bookinfo += u'%s\n'%(self.metadata.title_reading, self.metadata.title) @@ -113,7 +113,7 @@ class LRFDocument(LRFMetaFile): pages += unicode(page) pages += close traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] - + objects = u'\n\n' styles = u'\n + + +
+ comic page #%d +
+ + + ''') + dir = os.path.dirname(pages[0]) + for i, page in enumerate(pages): + wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1) + page = os.path.join(dir, 'page_%d.xhtml'%(i+1)) + open(page, 'wb').write(wrapper) + wrappers.append(page) + return wrappers + diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index e12686a36c..941a1ec5fc 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -47,7 +47,7 @@ def print_help(parser, log): def check_command_line_options(parser, args, log): if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'): - print_help(parser) + print_help(parser, log) log.error('\n\nYou must specify the input AND output files') raise SystemExit(1) diff --git a/src/calibre/ebooks/lrf/comic/convert_from.py b/src/calibre/ebooks/lrf/comic/convert_from.py deleted file mode 100755 index 50f5e1e72e..0000000000 --- a/src/calibre/ebooks/lrf/comic/convert_from.py +++ /dev/null @@ -1,562 +0,0 @@ -from __future__ import with_statement -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -__docformat__ = 'restructuredtext en' - -''' -Based on ideas from comiclrf created by FangornUK. -''' - -import os, sys, shutil, traceback, textwrap, fnmatch -from uuid import uuid4 - - - - -from calibre import extract, terminal_controller, __appname__, __version__ -from calibre.utils.config import Config, StringConfig -from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.parallel import Server, ParallelJob -from calibre.utils.terminfo import ProgressBar -from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock -from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf import OPFCreator -from calibre.ebooks.epub.from_html import config as html2epub_config, convert as html2epub -from calibre.customize.ui import run_plugins_on_preprocess -try: - from calibre.utils.PythonMagickWand import \ - NewMagickWand, NewPixelWand, \ - MagickSetImageBorderColor, \ - MagickReadImage, MagickRotateImage, \ - MagickTrimImage, PixelSetColor,\ - MagickNormalizeImage, MagickGetImageWidth, \ - MagickGetImageHeight, \ - MagickResizeImage, MagickSetImageType, \ - GrayscaleType, CatromFilter, MagickSetImagePage, \ - MagickBorderImage, MagickSharpenImage, MagickDespeckleImage, \ - MagickQuantizeImage, RGBColorspace, \ - MagickWriteImage, DestroyPixelWand, \ - DestroyMagickWand, CloneMagickWand, \ - MagickThumbnailImage, MagickCropImage, ImageMagick - _imagemagick_loaded = True -except: - _imagemagick_loaded = False - -PROFILES = { - # Name : (width, height) in pixels - 'prs500':(584, 754), - # The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image - 'prs500-landscape': (784, 1012) - } - -def extract_comic(path_to_comic_file): - ''' - Un-archive the comic file. - ''' - tdir = PersistentTemporaryDirectory(suffix='_comic_extract') - extract(path_to_comic_file, tdir) - return tdir - -def find_pages(dir, sort_on_mtime=False, verbose=False): - ''' - Find valid comic pages in a previously un-archived comic. - - :param dir: Directory in which extracted comic lives - :param sort_on_mtime: If True sort pages based on their last modified time. - Otherwise, sort alphabetically. - ''' - extensions = ['jpeg', 'jpg', 'gif', 'png'] - pages = [] - for datum in os.walk(dir): - for name in datum[-1]: - path = os.path.join(datum[0], name) - for ext in extensions: - if path.lower().endswith('.'+ext): - pages.append(path) - break - if sort_on_mtime: - comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime) - else: - comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y)) - - pages.sort(cmp=comparator) - if verbose: - print 'Found comic pages...' - print '\t'+'\n\t'.join([os.path.basename(p) for p in pages]) - return pages - -class PageProcessor(list): - ''' - Contains the actual image rendering logic. See :method:`render` and - :method:`process_pages`. - ''' - - def __init__(self, path_to_page, dest, opts, num): - list.__init__(self) - self.path_to_page = path_to_page - self.opts = opts - self.num = num - self.dest = dest - self.rotate = False - self.render() - - - def render(self): - img = NewMagickWand() - if img < 0: - raise RuntimeError('Cannot create wand.') - if not MagickReadImage(img, self.path_to_page): - raise IOError('Failed to read image from: %'%self.path_to_page) - width = MagickGetImageWidth(img) - height = MagickGetImageHeight(img) - if self.num == 0: # First image so create a thumbnail from it - thumb = CloneMagickWand(img) - if thumb < 0: - raise RuntimeError('Cannot create wand.') - MagickThumbnailImage(thumb, 60, 80) - MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png')) - DestroyMagickWand(thumb) - self.pages = [img] - if width > height: - if self.opts.landscape: - self.rotate = True - else: - split1, split2 = map(CloneMagickWand, (img, img)) - DestroyMagickWand(img) - if split1 < 0 or split2 < 0: - raise RuntimeError('Cannot create wand.') - MagickCropImage(split1, (width/2)-1, height, 0, 0) - MagickCropImage(split2, (width/2)-1, height, width/2, 0 ) - self.pages = [split2, split1] if self.opts.right2left else [split1, split2] - self.process_pages() - - def process_pages(self): - for i, wand in enumerate(self.pages): - pw = NewPixelWand() - try: - if pw < 0: - raise RuntimeError('Cannot create wand.') - PixelSetColor(pw, 'white') - - MagickSetImageBorderColor(wand, pw) - if self.rotate: - MagickRotateImage(wand, pw, -90) - - # 25 percent fuzzy trim? - if not self.opts.disable_trim: - MagickTrimImage(wand, 25*65535/100) - MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage" - # Do the Photoshop "Auto Levels" equivalent - if not self.opts.dont_normalize: - MagickNormalizeImage(wand) - sizex = MagickGetImageWidth(wand) - sizey = MagickGetImageHeight(wand) - - SCRWIDTH, SCRHEIGHT = PROFILES[self.opts.profile] - - if self.opts.keep_aspect_ratio: - # Preserve the aspect ratio by adding border - aspect = float(sizex) / float(sizey) - if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)): - newsizey = SCRHEIGHT - newsizex = int(newsizey * aspect) - deltax = (SCRWIDTH - newsizex) / 2 - deltay = 0 - else: - newsizex = SCRWIDTH - newsizey = int(newsizex / aspect) - deltax = 0 - deltay = (SCRHEIGHT - newsizey) / 2 - MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0) - MagickSetImageBorderColor(wand, pw) - MagickBorderImage(wand, pw, deltax, deltay) - elif self.opts.wide: - # Keep aspect and Use device height as scaled image width so landscape mode is clean - aspect = float(sizex) / float(sizey) - screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT) - # Get dimensions of the landscape mode screen - # Add 25px back to height for the battery bar. - wscreenx = SCRHEIGHT + 25 - wscreeny = int(wscreenx / screen_aspect) - if aspect <= screen_aspect: - newsizey = wscreeny - newsizex = int(newsizey * aspect) - deltax = (wscreenx - newsizex) / 2 - deltay = 0 - else: - newsizex = wscreenx - newsizey = int(newsizex / aspect) - deltax = 0 - deltay = (wscreeny - newsizey) / 2 - MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0) - MagickSetImageBorderColor(wand, pw) - MagickBorderImage(wand, pw, deltax, deltay) - else: - MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, CatromFilter, 1.0) - - if not self.opts.dont_sharpen: - MagickSharpenImage(wand, 0.0, 1.0) - - MagickSetImageType(wand, GrayscaleType) - - if self.opts.despeckle: - MagickDespeckleImage(wand) - - MagickQuantizeImage(wand, self.opts.colors, RGBColorspace, 0, 1, 0) - dest = '%d_%d.png'%(self.num, i) - dest = os.path.join(self.dest, dest) - MagickWriteImage(wand, dest+'8') - os.rename(dest+'8', dest) - self.append(dest) - finally: - if pw > 0: - DestroyPixelWand(pw) - DestroyMagickWand(wand) - -def render_pages(tasks, dest, opts, notification=None): - ''' - Entry point for the job server. - ''' - failures, pages = [], [] - with ImageMagick(): - for num, path in tasks: - try: - pages.extend(PageProcessor(path, dest, opts, num)) - msg = _('Rendered %s') - except: - failures.append(path) - msg = _('Failed %s') - if opts.verbose: - msg += '\n' + traceback.format_exc() - msg = msg%path - if notification is not None: - notification(0.5, msg) - - return pages, failures - - -class JobManager(object): - ''' - Simple job manager responsible for keeping track of overall progress. - ''' - - def __init__(self, total, update): - self.total = total - self.update = update - self.done = 0 - self.add_job = lambda j: j - self.output = lambda j: j - self.start_work = lambda j: j - self.job_done = lambda j: j - - def status_update(self, job): - self.done += 1 - #msg = msg%os.path.basename(job.args[0]) - self.update(float(self.done)/self.total, job.msg) - -def process_pages(pages, opts, update): - ''' - Render all identified comic pages. - ''' - if not _imagemagick_loaded: - raise RuntimeError('Failed to load ImageMagick') - - tdir = PersistentTemporaryDirectory('_comic2lrf_pp') - job_manager = JobManager(len(pages), update) - server = Server() - jobs = [] - tasks = server.split(pages) - for task in tasks: - jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager, - args=[task, tdir, opts])) - server.add_job(jobs[-1]) - server.wait() - server.killall() - server.close() - ans, failures = [], [] - - for job in jobs: - if job.result is None: - raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback)) - pages, failures_ = job.result - ans += pages - failures += failures_ - return ans, failures, tdir - -def config(defaults=None,output_format='lrf'): - desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks') - if defaults is None: - c = Config('comic', desc) - else: - c = StringConfig(defaults, desc) - c.add_opt('title', ['-t', '--title'], - help=_('Title for generated ebook. Default is to use the filename.')) - c.add_opt('author', ['-a', '--author'], - help=_('Set the author in the metadata of the generated ebook. Default is %default'), - default=_('Unknown')) - c.add_opt('output', ['-o', '--output'], - help=_('Path to output file. By default a file is created in the current directory.')) - c.add_opt('colors', ['-c', '--colors'], type='int', default=64, - help=_('Number of colors for grayscale image conversion. Default: %default')) - c.add_opt('dont_normalize', ['-n', '--disable-normalize'], default=False, - help=_('Disable normalize (improve contrast) color range for pictures. Default: False')) - c.add_opt('keep_aspect_ratio', ['-r', '--keep-aspect-ratio'], default=False, - help=_('Maintain picture aspect ratio. Default is to fill the screen.')) - c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False, - help=_('Disable sharpening.')) - c.add_opt('disable_trim', ['--disable-trim'], default=False, - help=_('Disable trimming of comic pages. For some comics, ' - 'trimming might remove content as well as borders.')) - c.add_opt('landscape', ['-l', '--landscape'], default=False, - help=_("Don't split landscape images into two portrait images")) - c.add_opt('wide', ['-w', '--wide-aspect'], default=False, - help=_("Keep aspect ratio and scale image using screen height as image width for viewing in landscape mode.")) - c.add_opt('right2left', ['--right2left'], default=False, action='store_true', - help=_('Used for right-to-left publications like manga. Causes landscape pages to be split into portrait pages from right to left.')) - c.add_opt('despeckle', ['-d', '--despeckle'], default=False, - help=_('Enable Despeckle. Reduces speckle noise. May greatly increase processing time.')) - c.add_opt('no_sort', ['--no-sort'], default=False, - help=_("Don't sort the files found in the comic alphabetically by name. Instead use the order they were added to the comic.")) - c.add_opt('profile', ['-p', '--profile'], default='prs500', choices=PROFILES.keys(), - help=_('Choose a profile for the device you are generating this file for. The default is the SONY PRS-500 with a screen size of 584x754 pixels. This is suitable for any reader with the same screen size. Choices are %s')%PROFILES.keys()) - c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count', - help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.')) - c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False, - help=_("Don't show progress bar.")) - if output_format == 'pdf': - c.add_opt('no_process',['--no_process'], default=False, - help=_("Apply no processing to the image")) - return c - -def option_parser(output_format='lrf'): - c = config(output_format=output_format) - return c.option_parser(usage=_('''\ -%prog [options] comic.cb[z|r] - -Convert a comic in a CBZ or CBR file to an ebook. -''')) - -def create_epub(pages, profile, opts, thumbnail=None): - wrappers = [] - WRAPPER = textwrap.dedent('''\ - - - Page #%d - - - -
- comic page #%d -
- - - ''') - dir = os.path.dirname(pages[0]) - for i, page in enumerate(pages): - wrapper = WRAPPER%(i+1, os.path.basename(page), i+1) - page = os.path.join(dir, 'page_%d.html'%(i+1)) - open(page, 'wb').write(wrapper) - wrappers.append(page) - - mi = MetaInformation(opts.title, [opts.author]) - opf = OPFCreator(dir, mi) - opf.create_manifest([(w, None) for w in wrappers]) - opf.create_spine(wrappers) - metadata = os.path.join(dir, 'metadata.opf') - opf.render(open(metadata, 'wb')) - opts2 = html2epub_config('margin_left=0\nmargin_right=0\nmargin_top=0\nmargin_bottom=0').parse() - opts2.output = opts.output - html2epub(metadata, opts2) - -def create_lrf(pages, profile, opts, thumbnail=None): - width, height = PROFILES[profile] - ps = {} - ps['topmargin'] = 0 - ps['evensidemargin'] = 0 - ps['oddsidemargin'] = 0 - ps['textwidth'] = width - ps['textheight'] = height - book = Book(title=opts.title, author=opts.author, - bookid=uuid4().hex, - publisher='%s %s'%(__appname__, __version__), thumbnail=thumbnail, - category='Comic', pagestyledefault=ps, - booksetting=BookSetting(screenwidth=width, screenheight=height)) - for page in pages: - imageStream = ImageStream(page) - _page = book.create_page() - _page.append(ImageBlock(refstream=imageStream, - blockwidth=width, blockheight=height, xsize=width, - ysize=height, x1=width, y1=height)) - book.append(_page) - - book.renderLrf(open(opts.output, 'wb')) - print _('Output written to'), opts.output - - -def create_pdf(pages, profile, opts, thumbnail=None,toc=None): - width, height = PROFILES[profile] - - from reportlab.pdfgen import canvas - - cur_page=0 - heading = [] - if toc != None: - if len(toc) == 1: - toc = None - else: - toc_index = 0 - base_cur = 0 - rem = 0 - breaker = False - while True: - letter=toc[0][0][base_cur] - for i in range(len(toc)): - if letter != toc[i][0][base_cur]: - breaker = True - if breaker: - break - if letter == os.sep: - rem=base_cur - base_cur += 1 - toc.append(("Not seen",-1)) - - - pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15)) - pdf.setAuthor(opts.author) - pdf.setTitle(opts.title) - - - for page in pages: - if opts.keep_aspect_ratio: - img = NewMagickWand() - if img < 0: - raise RuntimeError('Cannot create wand.') - if not MagickReadImage(img, page): - raise IOError('Failed to read image from: %'%page) - sizex = MagickGetImageWidth(img) - sizey = MagickGetImageHeight(img) - if opts.keep_aspect_ratio: - # Preserve the aspect ratio by adding border - aspect = float(sizex) / float(sizey) - if aspect <= (float(width) / float(height)): - newsizey = height - newsizex = int(newsizey * aspect) - deltax = (width - newsizex) / 2 - deltay = 0 - else: - newsizex = width - newsizey = int(newsizex / aspect) - deltax = 0 - deltay = (height - newsizey) / 2 - pdf.drawImage(page, x=deltax,y=deltay,width=newsizex, height=newsizey) - else: - pdf.drawImage(page, x=0,y=0,width=width, height=height) - if toc != None: - if toc[toc_index][1] == cur_page: - tmp=toc[toc_index][0] - toc_current=tmp[rem:len(tmp)-4] - index=0 - while True: - key = 'page%d-%d' % (cur_page, index) - pdf.bookmarkPage(key) - (head,dummy,list)=toc_current.partition(os.sep) - try: - if heading[index] != head: - heading[index] = head - pdf.addOutlineEntry(title=head,key=key,level=index) - except: - heading.append(head) - pdf.addOutlineEntry(title=head,key=key,level=index) - index += 1 - toc_current=list - if dummy == "": - break - toc_index += 1 - cur_page += 1 - pdf.showPage() - # Write the document to disk - pdf.save() - - -def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'): - path_to_file = run_plugins_on_preprocess(path_to_file) - source = path_to_file - to_delete = [] - toc = [] - list = [] - pages = [] - - - if not opts.title: - opts.title = os.path.splitext(os.path.basename(source))[0] - if not opts.output: - opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format) - if os.path.isdir(source): - for path in all_files( source , '*.cbr|*.cbz' ): - list.append( path ) - else: - list= [ os.path.abspath(source) ] - - for source in list: - tdir = extract_comic(source) - new_pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose) - thumbnail = None - if not new_pages: - raise ValueError('Could not find any pages in the comic: %s'%source) - if not getattr(opts, 'no_process', False): - new_pages, failures, tdir2 = process_pages(new_pages, opts, notification) - if not new_pages: - raise ValueError('Could not find any valid pages in the comic: %s'%source) - if failures: - print 'Could not process the following pages (run with --verbose to see why):' - for f in failures: - print '\t', f - thumbnail = os.path.join(tdir2, 'thumbnail.png') - if not os.access(thumbnail, os.R_OK): - thumbnail = None - toc.append((source,len(pages))) - pages.extend(new_pages) - to_delete.append(tdir) - - - if output_format == 'lrf': - create_lrf(pages, opts.profile, opts, thumbnail=thumbnail) - if output_format == 'epub': - create_epub(pages, opts.profile, opts, thumbnail=thumbnail) - if output_format == 'pdf': - create_pdf(pages, opts.profile, opts, thumbnail=thumbnail,toc=toc) - for tdir in to_delete: - shutil.rmtree(tdir) - - -def all_files(root, patterns='*'): - # Expand patterns from semicolon-separated string to list - patterns = patterns.split('|') - for path, subdirs, files in os.walk(root): - files.sort( ) - for name in files: - for pattern in patterns: - if fnmatch.fnmatch(name, pattern): - yield os.path.join(path, name) - break - - -def main(args=sys.argv, notification=None, output_format='lrf'): - parser = option_parser(output_format=output_format) - opts, args = parser.parse_args(args) - if len(args) < 2: - parser.print_help() - print '\nYou must specify a file to convert' - return 1 - - if not callable(notification): - pb = ProgressBar(terminal_controller, _('Rendering comic pages...'), - no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False)) - notification = pb.update - - source = os.path.abspath(args[1]) - do_convert(source, opts, notification, output_format=output_format) - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index ffafa6d1a2..ea965c3410 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -128,6 +128,8 @@ class EbookIterator(object): plumber.setup_options() if hasattr(plumber.opts, 'dont_package'): plumber.opts.dont_package = True + if hasattr(plumber.opts, 'no_process'): + plumber.opts.no_process = True self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), plumber.opts, plumber.input_fmt, self.log, {}, self.base) diff --git a/src/calibre/libunzip.py b/src/calibre/libunzip.py index 55d71014a0..f384af1073 100644 --- a/src/calibre/libunzip.py +++ b/src/calibre/libunzip.py @@ -3,19 +3,19 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' -import os, re +import re from calibre.utils import zipfile def update(pathtozip, patterns, filepaths, names, compression=zipfile.ZIP_DEFLATED, verbose=True): ''' - Update files in the zip file at `pathtozip` matching the given + Update files in the zip file at `pathtozip` matching the given `patterns` with the given `filepaths`. If more than - one file matches, all of the files are replaced. - + one file matches, all of the files are replaced. + :param patterns: A list of compiled regular expressions :param filepaths: A list of paths to the replacement files. Must have the same length as `patterns`. - :param names: A list of archive names for each file in filepaths. + :param names: A list of archive names for each file in filepaths. A name can be `None` in which case the name of the existing file in the archive is used. :param compression: The compression to use when replacing files. Can be @@ -48,4 +48,4 @@ def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I) names = zf.namelist() for name in names: if match.search(name): - return name, zf.read(name) \ No newline at end of file + return name, zf.read(name) diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index 90a2969c86..cb14c4ed20 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -43,7 +43,7 @@ PARALLEL_FUNCS = { 'lrfviewer' : ('calibre.gui2.lrf_renderer.main', 'main', {}, None), - + 'ebook-viewer' : ('calibre.gui2.viewer.main', 'main', {}, None), @@ -52,34 +52,34 @@ PARALLEL_FUNCS = { 'render_table' : ('calibre.ebooks.lrf.html.table_as_image', 'do_render', {}, None), - + 'render_pages' : - ('calibre.ebooks.lrf.comic.convert_from', 'render_pages', {}, 'notification'), + ('calibre.ebooks.comic.input', 'render_pages', {}, 'notification'), 'comic2lrf' : ('calibre.ebooks.lrf.comic.convert_from', 'do_convert', {}, 'notification'), - + 'any2epub' : ('calibre.ebooks.epub.from_any', 'any2epub', {}, None), - + 'feeds2epub' : ('calibre.ebooks.epub.from_feeds', 'main', {}, 'notification'), - + 'comic2epub' : ('calibre.ebooks.epub.from_comic', 'convert', {}, 'notification'), - + 'any2mobi' : ('calibre.ebooks.mobi.from_any', 'any2mobi', {}, None), - + 'any2pdf' : - ('calibre.ebooks.pdf.from_any', 'any2pdf', {}, None), - + ('calibre.ebooks.pdf.from_any', 'any2pdf', {}, None), + 'feeds2mobi' : ('calibre.ebooks.mobi.from_feeds', 'main', {}, 'notification'), - + 'comic2mobi' : ('calibre.ebooks.mobi.from_comic', 'convert', {}, 'notification'), - + 'ebook-convert' : ('calibre.ebooks.conversion.cli', 'main', {}, None), } @@ -174,7 +174,7 @@ class WorkerMother(object): contents = os.path.join(contents, 'console.app', 'Contents') self.executable = os.path.join(contents, 'MacOS', os.path.basename(sys.executable)) - + resources = os.path.join(contents, 'Resources') fd = os.path.join(contents, 'Frameworks') sp = os.path.join(resources, 'lib', 'python'+sys.version[:3], 'site-packages.zip') @@ -198,7 +198,7 @@ class WorkerMother(object): for func in ('spawn_free_spirit', 'spawn_worker'): setattr(self, func, getattr(self, func+'_'+ext)) - + def cleanup_child_windows(self, child, name=None, fd=None): try: child.kill() @@ -526,8 +526,8 @@ class JobKilled(Exception): pass class Job(object): - - def __init__(self, job_done, job_manager=None, + + def __init__(self, job_done, job_manager=None, args=[], kwargs={}, description=None): self.args = args self.kwargs = kwargs @@ -540,9 +540,9 @@ class Job(object): self.description = description self.start_time = None self.running_time = None - + self.result = self.exception = self.traceback = self.log = None - + def __cmp__(self, other): sstatus, ostatus = self.status(), other.status() if sstatus == ostatus or (self.has_run and other.has_run): @@ -557,8 +557,8 @@ class Job(object): return -1 if ostatus == 'WAITING': return 1 - - + + def job_done(self): self.is_running, self.has_run = False, True self.running_time = (time.time() - self.start_time) if \ @@ -566,14 +566,14 @@ class Job(object): if self.job_manager is not None: self.job_manager.job_done(self) self._job_done(self) - + def start_work(self): self.is_running = True self.has_run = False self.start_time = time.time() if self.job_manager is not None: self.job_manager.start_work(self) - + def update_status(self, percent, msg=None): self.percent = percent self.msg = msg @@ -582,7 +582,7 @@ class Job(object): self.job_manager.status_update(self) except: traceback.print_exc() - + def status(self): if self.is_running: return 'WORKING' @@ -592,7 +592,7 @@ class Job(object): if self.exception is None: return 'DONE' return 'ERROR' - + def console_text(self): ans = [u'Job: '] if self.description: @@ -610,13 +610,13 @@ class Job(object): if self.traceback: ans.append(u'**Traceback**:') ans.extend(self.traceback.split('\n')) - + if self.log: if isinstance(self.log, str): self.log = unicode(self.log, 'utf-8', 'replace') ans.append(self.log) return (u'\n'.join(ans)).encode('utf-8') - + def gui_text(self): ans = [u'Job: '] if self.description: @@ -641,19 +641,19 @@ class Job(object): if isinstance(self.log, str): self.log = unicode(self.log, 'utf-8', 'replace') ans.extend(self.log.split('\n')) - + ans = [x.decode(preferred_encoding, 'replace') if isinstance(x, str) else x for x in ans] - + return u'
'.join(ans) class ParallelJob(Job): - + def __init__(self, func, *args, **kwargs): Job.__init__(self, *args, **kwargs) self.func = func self.done = self.job_done - + def output(self, msg): if not self.log: self.log = u'' @@ -663,7 +663,7 @@ class ParallelJob(Job): self.log += msg if self.job_manager is not None: self.job_manager.output(self) - + def remove_ipc_socket(path): os = __import__('os') @@ -702,7 +702,7 @@ class Server(Thread): self.result_lock = RLock() self.pool_lock = RLock() self.start() - + def split(self, tasks): ''' Split a list into a list of sub lists, with the number of sub lists being @@ -720,7 +720,7 @@ class Server(Thread): ans.append(section) pos += delta return ans - + def close(self): try: @@ -733,7 +733,7 @@ class Server(Thread): self.jobs.append(job) if job.job_manager is not None: job.job_manager.add_job(job) - + def poll(self): ''' Return True if the server has either working or queued jobs @@ -741,14 +741,14 @@ class Server(Thread): with self.job_lock: with self.working_lock: return len(self.jobs) + len(self.working) > 0 - + def wait(self, sleep=1): ''' Wait until job queue is empty ''' while self.poll(): time.sleep(sleep) - + def run(self): while True: job = None @@ -935,7 +935,7 @@ def work(client_socket, func, args, kwdargs): func(*args, **kwargs) except (Exception, SystemExit): continue - + time.sleep(5) # Give any in progress BufferedSend time to complete @@ -948,7 +948,7 @@ def worker(host, port): if msg != 'OK': return 1 write(client_socket, 'WAITING') - + sys.stdout = BufferedSender(client_socket) sys.stderr = sys.stdout From 0749f44979ea69ab05109fbad777331650ba0658 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Apr 2009 23:00:47 -0700 Subject: [PATCH 7/9] IGN:... --- src/calibre/customize/conversion.py | 8 ++++++ src/calibre/ebooks/comic/input.py | 6 +++++ src/calibre/ebooks/pdf/output.py | 13 +++++---- src/calibre/ebooks/pdf/writer.py | 41 ++++++++++++++--------------- 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index c358986d18..7573dddeac 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -133,6 +133,14 @@ class InputFormatPlugin(Plugin): #: (option_name, recommended_value, recommendation_level) recommendations = set([]) + def get_images(self): + ''' + Return a list of absolute paths to the images, if this input plugin + represents an image collection. The list of images is in the same order + as the spine and the TOC. + ''' + raise NotImplementedError() + def convert(self, stream, options, file_ext, log, accelerators): ''' This method must be implemented in sub-classes. It must return diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index f6d6557ee4..82070bbc72 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -357,6 +357,9 @@ class ComicInput(InputFormatPlugin): thumbnail = None return new_pages + def get_images(self): + return self._images + def convert(self, stream, opts, file_ext, log, accelerators, progress=lambda p, m : m): from calibre.ebooks.metadata import MetaInformation @@ -401,6 +404,9 @@ class ComicInput(InputFormatPlugin): spine = [] for comic in comics: spine.extend(map(href, comic[2])) + self._images = [] + for comic in comics: + self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index 20ba5028b0..7b8b0323ab 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -40,7 +40,7 @@ class PDFOutput(OutputFormatPlugin): OptionRecommendation(name='margin_right', recommended_value='1', level=OptionRecommendation.LOW, help=_('The right margin around the document.')), - + OptionRecommendation(name='unit', recommended_value='inch', level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(), help=_('The unit of measure. Default is inch. Choices ' @@ -58,15 +58,18 @@ class PDFOutput(OutputFormatPlugin): help=_('The orientation of the page. Default is portrait. Choices ' 'are %s' % ORIENTATIONS.keys())), ]) - + def convert(self, oeb_book, output_path, input_plugin, opts, log): + self.opts, self.log = opts, log + if input_plugin.is_image_collection: + self.convert_images(input_plugin.get_images()) with TemporaryDirectory('_pdf_out') as oebdir: OEBOutput(None).convert(oeb_book, oebdir, input_plugin, opts, log) opf = glob.glob(os.path.join(oebdir, '*.opf'))[0] writer = PDFWriter(log, opts) - + close = False if not hasattr(output_path, 'write'): close = True @@ -75,10 +78,10 @@ class PDFOutput(OutputFormatPlugin): out_stream = open(output_path, 'wb') else: out_stream = output_path - + out_stream.seek(0) out_stream.truncate() writer.dump(opf, out_stream, PDFMetadata(oeb_book.metadata)) - + if close: out_stream.close() diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index f91dae44fd..7a9973c6d7 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -9,12 +9,11 @@ __docformat__ = 'restructuredtext en' Write content to PDF. ''' -import os, shutil, sys +import os, shutil from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.customize.profiles import OutputProfile from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ - orientation, size + orientation, size from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata.opf2 import OPF @@ -24,12 +23,12 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, \ from PyQt4.QtWebKit import QWebView from pyPdf import PdfFileWriter, PdfFileReader - + class PDFMetadata(object): def __init__(self, oeb_metadata=None): self.title = _('Unknown') self.author = _('Unknown') - + if oeb_metadata != None: if len(oeb_metadata.title) >= 1: self.title = oeb_metadata.title[0].value @@ -42,16 +41,16 @@ class PDFWriter(QObject): if QApplication.instance() is None: QApplication([]) QObject.__init__(self) - + self.logger = log - + self.loop = QEventLoop() self.view = QWebView() self.connect(self.view, SIGNAL('loadFinished(bool)'), self._render_html) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') - + self.custom_size = None if opts.custom_size != None: width, sep, height = opts.custom_size.partition('x') @@ -62,44 +61,44 @@ class PDFWriter(QObject): self.custom_size = (width, height) except: self.custom_size = None - + self.opts = opts - + def dump(self, opfpath, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() - + opf = OPF(opfpath, os.path.dirname(opfpath)) self.render_queue = [i.path for i in opf.spine] self.combine_queue = [] self.out_stream = out_stream - + QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) self.loop.exec_() - + @QtCore.pyqtSignature('_render_book()') def _render_book(self): if len(self.render_queue) == 0: self._write() else: self._render_next() - + def _render_next(self): item = str(self.render_queue.pop(0)) self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) - + self.logger.info('Processing %s...' % item) - + self.view.load(QUrl(item)) def _render_html(self, ok): if ok: item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue)) - + self.logger.debug('\tRendering item %s as %i' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue))) - + printer = QPrinter(QPrinter.HighResolution) - + if self.opts.output_profile.short_name == 'default': if self.custom_size == None: printer.setPaperSize(paper_size(self.opts.paper_size)) @@ -107,7 +106,7 @@ class PDFWriter(QObject): printer.setPaperSize(QSizeF(self.custom_size[0], self.custom_size[1]), unit(self.opts.unit)) else: printer.setPaperSize(QSizeF(self.opts.output_profile.width / self.opts.output_profile.dpi, self.opts.output_profile.height / self.opts.output_profile.dpi), QPrinter.Inch) - + printer.setPageMargins(size(self.opts.margin_left), size(self.opts.margin_top), size(self.opts.margin_right), size(self.opts.margin_bottom), unit(self.opts.unit)) printer.setOrientation(orientation(self.opts.orientation)) printer.setOutputFormat(QPrinter.PdfFormat) @@ -122,7 +121,7 @@ class PDFWriter(QObject): def _write(self): self.logger.info('Combining individual PDF parts...') - + try: outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) for item in self.combine_queue: From 996dda3ffea65144cdb62fcd0e2c8c231f4f2325 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Apr 2009 12:02:18 -0700 Subject: [PATCH 8/9] Fix regression in LIT metadata reader --- src/calibre/ebooks/lit/reader.py | 3 +++ src/calibre/ebooks/metadata/lit.py | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 79249fe7c3..37328328b7 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -882,6 +882,9 @@ class LitContainer(object): unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP) return str(unbin) + def get_metadata(self): + return self._read_meta() + class LitReader(OEBReader): Container = LitContainer diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 2a57d2f2d2..0a37b6c768 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -4,15 +4,16 @@ __copyright__ = '2008, Kovid Goyal ' Support for reading the metadata from a LIT file. ''' -import sys, cStringIO, os +import cStringIO, os from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPF -from calibre.ebooks.lit.reader import LitReader def get_metadata(stream): - litfile = LitReader(stream) - src = litfile.meta.encode('utf-8') + from calibre.ebooks.lit.reader import LitContainer + litfile = LitContainer(stream) + src = litfile.get_metadata().encode('utf-8') + litfile = litfile._litfile opf = OPF(cStringIO.StringIO(src), os.getcwd()) mi = MetaInformation(opf) covers = [] From 2da5589964160c991f13ecfaea1b84b6ce93a92a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Apr 2009 15:41:10 -0700 Subject: [PATCH 9/9] Input plugin for recipes --- src/calibre/customize/builtins.py | 3 +- src/calibre/ebooks/conversion/cli.py | 5 +- src/calibre/ebooks/conversion/plumber.py | 18 +++++- src/calibre/ebooks/oeb/base.py | 8 +-- src/calibre/ebooks/oeb/transforms/split.py | 1 + src/calibre/web/__init__.py | 3 +- src/calibre/web/feeds/input.py | 65 ++++++++++++++++++++++ src/calibre/web/feeds/news.py | 36 +++++------- 8 files changed, 108 insertions(+), 31 deletions(-) create mode 100644 src/calibre/web/feeds/input.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 9a686e0d94..dcbffade92 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.comic.input import ComicInput +from calibre.web.feeds.input import RecipeInput from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.txt.output import TXTOutput @@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, - FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput] + FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 941a1ec5fc..d8de702915 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log): raise SystemExit(1) input = os.path.abspath(args[1]) - if not os.access(input, os.R_OK): + if not input.endswith('.recipe') and not os.access(input, os.R_OK): log.error('Cannot read from', input) raise SystemExit(1) @@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber): if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) + option_recommendation_to_cli_option(parser.add_option, + plumber.get_option_by_name('list_recipes')) + def option_parser(): return OptionParser(usage=USAGE) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index da9c9f11e2..1ef58e1d95 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer', OptionRecommendation(name='language', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the language.')), + +OptionRecommendation(name='list_recipes', + recommended_value=False, help=_('List available recipes.')), + ] input_fmt = os.path.splitext(self.input)[1] @@ -525,6 +529,13 @@ OptionRecommendation(name='language', self.setup_options() if self.opts.verbose: self.log.filter_level = self.log.DEBUG + if self.opts.list_recipes: + from calibre.web.feeds.recipes import titles + self.log('Available recipes:') + for title in sorted(titles): + self.log('\t'+title) + self.log('%d recipes available'%len(titles)) + raise SystemExit(0) # Run any preprocess plugins from calibre.customize.ui import run_plugins_on_preprocess @@ -535,8 +546,13 @@ OptionRecommendation(name='language', accelerators = {} tdir = PersistentTemporaryDirectory('_plumber') + stream = self.input if self.input_fmt == 'recipe' else \ + open(self.input, 'rb') - self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts, + if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf': + self.opts.lrf = True + + self.oeb = self.input_plugin(stream, self.opts, self.input_fmt, self.log, accelerators, tdir) if self.opts.debug_input is not None: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 9d8598c766..f5395e04fe 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1578,15 +1578,15 @@ class OEBBook(object): return data.decode('utf-16') except UnicodeDecodeError: pass - try: - return data.decode('utf-8') - except UnicodeDecodeError: - pass if self.encoding is not None: try: return data.decode(self.encoding) except UnicodeDecodeError: pass + try: + return data.decode('utf-8') + except UnicodeDecodeError: + pass data, _ = xml_to_unicode(data) data = data.replace('\r\n', '\n') data = data.replace('\r', '\n') diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 21d71da5bb..ec3d63192d 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -59,6 +59,7 @@ class Split(object): self.fix_links() def split_item(self, item): + page_breaks, page_break_ids = [], [] if self.split_on_page_breaks: page_breaks, page_break_ids = self.find_page_breaks(item) diff --git a/src/calibre/web/__init__.py b/src/calibre/web/__init__.py index cadf21c39f..b14dc0ce28 100644 --- a/src/calibre/web/__init__.py +++ b/src/calibre/web/__init__.py @@ -2,5 +2,6 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' +class Recipe(object): + pass - \ No newline at end of file diff --git a/src/calibre/web/feeds/input.py b/src/calibre/web/feeds/input.py new file mode 100644 index 0000000000..21324293d3 --- /dev/null +++ b/src/calibre/web/feeds/input.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation + +class RecipeInput(InputFormatPlugin): + + name = 'Recipe Input' + author = 'Kovid Goyal' + description = _('Download periodical content from the internet') + file_types = set(['recipe']) + + recommendations = set([ + ('chapter_mark', 'none', OptionRecommendation.HIGH), + ('dont_split_on_page_breaks', True, OptionRecommendation.HIGH), + ('use_auto_toc', False, OptionRecommendation.HIGH), + ]) + + options = set([ + OptionRecommendation(name='test', recommended_value=False, + help=_('Useful for recipe development. Forces ' + 'max_articles_per_feed to 2 and downloads at most 2 feeds.')), + OptionRecommendation(name='username', recommended_value=None, + help=_('Username for sites that require a login to access ' + 'content.')), + OptionRecommendation(name='password', recommended_value=None, + help=_('Password for sites that require a login to access ' + 'content.')), + OptionRecommendation(name='lrf', recommended_value=False, + help='Optimize fetching for subsequent conversion to LRF.'), + ]) + + def convert(self, recipe_or_file, opts, file_ext, log, + accelerators, progress=lambda x, y: x): + from calibre.web.feeds.recipes import \ + get_builtin_recipe, compile_recipe + if os.access(recipe_or_file, os.R_OK): + recipe = compile_recipe(open(recipe_or_file, 'rb').read()) + else: + title = os.path.basename(recipe_or_file).rpartition('.')[0] + recipe = get_builtin_recipe(title) + + if recipe is None: + raise ValueError('%s is not a valid recipe file or builtin recipe' % + recipe_or_file) + + ro = recipe(opts, log, progress) + ro.download() + + opts.output_profile.flow_size = 0 + + for f in os.listdir('.'): + if f.endswith('.opf'): + return os.path.abspath(f) + + + + diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 6a248b6992..216a827326 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \ from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.lrf import entity_to_unicode +from calibre.web import Recipe from calibre.ebooks import render_html from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation @@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending -from calibre.utils.logging import Log from calibre.ptempfile import PersistentTemporaryFile, \ PersistentTemporaryDirectory -class BasicNewsRecipe(object): +class BasicNewsRecipe(Recipe): ''' Abstract base class that contains logic needed in all feed fetchers. ''' @@ -443,40 +443,34 @@ class BasicNewsRecipe(object): ''' raise NotImplementedError - def __init__(self, options, parser, progress_reporter): + def __init__(self, options, log, progress_reporter): ''' Initialize the recipe. :param options: Parsed commandline options :param parser: Command line option parser. Used to intelligently merge options. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. ''' - self.log = Log() - if options.verbose: - self.log.filter_level = self.log.DEBUG + self.log = log if not isinstance(self.title, unicode): self.title = unicode(self.title, 'utf-8', 'replace') - for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'): - setattr(self, attr, getattr(options, attr)) + self.debug = options.verbose > 1 + self.output_dir = os.getcwd() + self.verbose = options.verbose + self.test = options.test + self.username = options.username + self.password = options.password + self.lrf = options.lrf + self.output_dir = os.path.abspath(self.output_dir) if options.test: self.max_articles_per_feed = 2 self.simultaneous_downloads = min(4, self.simultaneous_downloads) - if self.debug: self.verbose = True self.report_progress = progress_reporter - self.username = self.password = None - #: If True optimize downloading for eventual conversion to LRF - self.lrf = False - defaults = parser.get_default_values() - - for opt in options.__dict__.keys(): - if getattr(options, opt) != getattr(defaults, opt, None): - setattr(self, opt, getattr(options, opt)) - if isinstance(self.feeds, basestring): self.feeds = eval(self.feeds) if isinstance(self.feeds, basestring): @@ -493,7 +487,6 @@ class BasicNewsRecipe(object): '--timeout', str(self.timeout), '--max-recursions', str(self.recursions), '--delay', str(self.delay), - '--timeout', str(self.timeout), ] if self.encoding is not None: web2disk_cmdline.extend(['--encoding', self.encoding]) @@ -520,9 +513,6 @@ class BasicNewsRecipe(object): self.simultaneous_downloads = 1 self.navbar = templates.NavBarTemplate() - self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8']) - if '--base-font-size' not in self.html2lrf_options: - self.html2lrf_options.extend(['--base-font-size', '12']) self.failed_downloads = [] self.partial_failures = [] @@ -557,7 +547,7 @@ class BasicNewsRecipe(object): return self.postprocess_html(soup, first_fetch) - def download(self, for_lrf=False): + def download(self): ''' Download and pre-process all articles from the feeds in this recipe. This method should be called only one on a particular Recipe instance.