From 14891bdd52d08a298a0557c312176f91b7a1e0a5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 10:25:00 -0700 Subject: [PATCH 01/58] New recipe for Wikinews by Darko Miletic --- src/calibre/gui2/images/news/wikinews_en.png | Bin 0 -> 951 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_wikinews_en.py | 70 ++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/images/news/wikinews_en.png create mode 100644 src/calibre/web/feeds/recipes/recipe_wikinews_en.py diff --git a/src/calibre/gui2/images/news/wikinews_en.png b/src/calibre/gui2/images/news/wikinews_en.png new file mode 100644 index 0000000000000000000000000000000000000000..489061b923e207b18d5dfee4e08ab715d198d7fc GIT binary patch literal 951 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V-x;TbdoX(wmzDp)l;%NQ%YkNbl{$G_o z!))?PHNOHY-;yJ)i(VX4eBk9+u=w4XQ|u>%c|Nn`u{sLOcj#$S;AmRS={9k}OG)-Q z8P;lwGyjE`u70<+I+jJWH@))5Im?0{eLH^K)BjVFJNe>035J=fG4<8DXM7fHF%|IG zqU6nbxGwsG{GV=%IcJ>XvXquy&{|Y9%|Rj1@Z8lMw=VYR9bdf<-O5KErU-h?7FsUoIv7GtjO=1c4pPAgR36>*pl+P=fP_0Y0Z%*wVw-@u^IZC=yk&Rq)-gC%3ndHyMCAREXxat1$S`RZ~{-J_$XT zV4C{B;$ZLOS-+1Jm%p4LI`3@XqgPA$o*Q`@=LJkP&CHRrz4YB(O{K6&+*Eyeu-%@( zg3m`bP1Lk$JUey9>gl^H!Z%zzJ3;YKh-!Xajn0#dd>`YjGgc+dls>nvWaqZcr#`2f z9{XL{2#lG1k!K{h}%VamM zS+G!wLw=#MFNY;C75sTWW15?==TUxj)%ZJhPp@`M-hQonAh_CHdux5SyfK5s+uoml z?$*r^V+^)zX>ifr#d|PY-bZftf8k=A=-tbkBGZ5gU$w+Fq9nN}HL)aBHw8#A7#SE? z>KYj98d!uFnp+teSQ%Mp8yHv_7?>V?#0%4qo1c=IR*9*>%*x2j%Fqm=A;rhh6{vy1 M)78&qol`;+0E-%zP5=M^ literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index a513f34728..8253021c57 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', + 'el_universal', 'mediapart', 'wikinews_en', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_wikinews_en.py b/src/calibre/web/feeds/recipes/recipe_wikinews_en.py new file mode 100644 index 0000000000..932981ca4c --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_wikinews_en.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +en.wikinews.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class WikiNews(BasicNewsRecipe): + title = 'Wikinews' + __author__ = 'Darko Miletic' + description = 'News from wikipedia' + category = 'news, world' + oldest_article = 7 + max_articles_per_feed = 100 + publisher = 'Wiki' + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + remove_javascript = True + language = _('English') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [ + dict(name='h1', attrs={'id':'firstHeading'}) + ,dict(name='div', attrs={'id':'bodyContent'}) + ] + + remove_tags = [ + dict(name='link') + ,dict(name='div',attrs={'id':['printfooter','catlinks','footer']}) + ,dict(name='div',attrs={'class':['thumb left','thumb right']}) + ] + + remove_tags_after = dict(name='h2') + + feeds = [(u'News', u'http://feeds.feedburner.com/WikinewsLatestNews')] + + def get_article_url(self, article): + artl = article.get('link', None) + rest, sep, article_id = artl.rpartition('/') + return 'http://en.wikinews.org/wiki/' + article_id + + def print_version(self, url): + rest, sep, article_id = url.rpartition('/') + return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes' + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + btag = soup.find('div',attrs={'id':'bodyContent'}) + for item in btag.findAll('div'): + item.extract() + for item in btag.findAll('h2'): + item.extract() + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(font=True): + del item['font'] + return soup + From 1ee09b193012f76ed68fd0109048467e87935c6e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 10:43:28 -0700 Subject: [PATCH 02/58] Fix #2017 (Another PRC file cover detection issue) --- src/calibre/ebooks/mobi/reader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 2c80cc1c8c..3ca1fd6c18 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -370,6 +370,11 @@ class MobiReader(object): opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1) elif mi.cover is not None: opf.cover = mi.cover + else: + opf.cover = 'images/%05d.jpg'%1 + if not os.path.exists(os.path.join(os.path.dirname(htmlfile), + *opf.cover.split('/'))): + opf.cover = None manifest = [(htmlfile, 'text/x-oeb1-document')] bp = os.path.dirname(htmlfile) for i in getattr(self, 'image_names', []): From 0f1414679eb0ba81b5428038e7024b404653cf04 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 10:53:11 -0700 Subject: [PATCH 03/58] Fix #2018 ([ERROR] CSSValue: Missing token for production Choice) --- src/calibre/ebooks/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 2883e39f8a..0b4f69b38b 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -859,7 +859,7 @@ class Processor(Parser): except ValueError: setting = '' face = font.attrib.pop('face', None) - if face is not None: + if face: faces = [] for face in face.split(','): face = face.strip() From 05eea1baf8c9764b0421153519990ce1f9de3a9a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 12:30:50 -0700 Subject: [PATCH 04/58] Remove dependency on help2man. Also significantly speds up man page generation --- src/calibre/__init__.py | 6 +- src/calibre/linux.py | 52 +++++---------- src/calibre/trac/plugins/download.py | 1 - src/calibre/trac/plugins/templates/linux.html | 2 +- src/calibre/utils/help2man.py | 63 +++++++++++++++++++ 5 files changed, 86 insertions(+), 38 deletions(-) create mode 100644 src/calibre/utils/help2man.py diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 706391f1a7..3bf1f03b42 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -2,11 +2,15 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, os, re, logging, time, subprocess, atexit, mimetypes + +import sys, os, re, logging, time, subprocess, atexit, mimetypes, warnings from htmlentitydefs import name2codepoint from math import floor from logging import Formatter +warnings.simplefilter('ignore', DeprecationWarning) + + from PyQt4.QtCore import QUrl from PyQt4.QtGui import QDesktopServices from calibre.startup import plugins, winutil, winutilerror diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 7abd9c027c..039d52985a 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -410,45 +410,27 @@ def option_parser(): help='Save a manifest of all installed files to the specified location') return parser -def install_man_pages(fatal_errors): - from bz2 import compress - import subprocess +def install_man_pages(fatal_errors, use_destdir=False): + from calibre.utils.help2man import create_man_page + prefix = os.environ.get('DESTDIR', '/') if use_destdir else '/' + manpath = os.path.join(prefix, 'usr/share/man/man1') + if not os.path.exists(manpath): + os.makedirs(manpath) print 'Installing MAN pages...' - manpath = '/usr/share/man/man1' - f = NamedTemporaryFile() - f.write('[see also]\nhttp://%s.kovidgoyal.net\n'%__appname__) - f.flush() manifest = [] - os.environ['PATH'] += ':'+os.path.expanduser('~/bin') for src in entry_points['console_scripts']: - prog = src[:src.index('=')].strip() - if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta', - 'markdown-calibre', 'calibre-debug', 'fb2-meta', - 'calibre-fontconfig', 'calibre-parallel', 'odt-meta', - 'rb-meta', 'imp-meta', 'mobi-meta'): + prog, right = src.split('=') + prog = prog.strip() + module = __import__(right.split(':')[0].strip(), fromlist=['a']) + parser = getattr(module, 'option_parser', None) + if parser is None: continue - - help2man = ('help2man', prog, '--name', 'part of %s'%__appname__, - '--section', '1', '--no-info', '--include', - f.name, '--manual', __appname__) + parser = parser() + raw = create_man_page(prog, parser) manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2') print '\tInstalling MAN page for', prog - try: - p = subprocess.Popen(help2man, stdout=subprocess.PIPE) - except OSError, err: - import errno - if err.errno != errno.ENOENT: - raise - print 'Failed to install MAN pages as help2man is missing from your system' - break - o = p.stdout.read() - raw = re.compile(r'^\.IP\s*^([A-Z :]+)$', re.MULTILINE).sub(r'.SS\n\1', o) - if not raw.strip(): - print 'Unable to create MAN page for', prog - continue - f2 = open_file(manfile) - manifest.append(f2.name) - f2.write(compress(raw)) + open(manfile, 'wb').write(raw) + manifest.append(manfile) return manifest def post_install(): @@ -460,9 +442,9 @@ def post_install(): manifest = [] setup_desktop_integration(opts.fatal_errors) if opts.no_root or os.geteuid() == 0: + manifest += install_man_pages(opts.fatal_errors, use_destdir) manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors) - manifest += setup_completion(opts.fatal_errors) - manifest += install_man_pages(opts.fatal_errors) + manifest += setup_completion(opts.fatal_errors) else: print "Skipping udev, completion, and man-page install for non-root user." diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py index 9c852c554e..020c0a0e3d 100644 --- a/src/calibre/trac/plugins/download.py +++ b/src/calibre/trac/plugins/download.py @@ -18,7 +18,6 @@ DEPENDENCIES = [ ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'), ('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), - ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'), ] diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index 066f3c9b6d..96881aa108 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -88,7 +88,7 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal. be ignored.
  • - You must have help2man and xdg-utils installed + You must have xdg-utils installed on your system before running the installer.
  • diff --git a/src/calibre/utils/help2man.py b/src/calibre/utils/help2man.py new file mode 100644 index 0000000000..603c0d6484 --- /dev/null +++ b/src/calibre/utils/help2man.py @@ -0,0 +1,63 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import time, bz2 + +from calibre.constants import __version__, __appname__, __author__ + + +def create_man_page(prog, parser): + usage = parser.usage.splitlines() + for i, line in enumerate(list(usage)): + if not line.strip(): + usage[i] = '.PP' + else: + usage[i] = line.replace('%prog', prog) + lines = [ + '.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') + + ' "%s (%s %s)" "%s"'%(prog, __appname__, __version__, __appname__), + '.SH NAME', + prog + r' \- part of '+__appname__, + '.SH SYNOPSIS', + '.B "%s"'%prog + r'\fR '+' '.join(usage[0].split()[1:]), + '.SH DESCRIPTION', + ] + lines += usage[1:] + + lines += [ + '.SH OPTIONS' + ] + def format_option(opt): + ans = ['.TP'] + opts = [] + opts += opt._short_opts + opts.append(opt.get_opt_string()) + opts = [r'\fB'+x.replace('-', r'\-')+r'\fR' for x in opts] + ans.append(', '.join(opts)) + help = opt.help if opt.help else '' + ans.append(help.replace('%prog', prog).replace('%default', str(opt.default))) + return ans + + for opt in parser.option_list: + lines.extend(format_option(opt)) + for group in parser.option_groups: + lines.append('.SS '+group.title) + for opt in group.option_list: + lines.extend(format_option(opt)) + + lines += ['.SH SEE ALSO', + 'The User Manual is available at ' + 'http://calibre.kovidgoyal.net/user_manual', + '.PP', '.B Created by '+__author__] + + return bz2.compress('\n'.join(lines)) + +def main(): + from calibre.ebooks.epub.from_any import option_parser + open('/tmp/any2epub.1calibre.bz2', 'w').write(create_man_page( + 'any2epub', option_parser())) + +if __name__ == '__main__': + main() \ No newline at end of file From 05329f8ca130b2713da70fd1eb07004e19368305 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 13:44:11 -0700 Subject: [PATCH 05/58] IGN:... --- src/calibre/utils/help2man.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/calibre/utils/help2man.py b/src/calibre/utils/help2man.py index 603c0d6484..9777ea24cd 100644 --- a/src/calibre/utils/help2man.py +++ b/src/calibre/utils/help2man.py @@ -44,6 +44,8 @@ def create_man_page(prog, parser): lines.extend(format_option(opt)) for group in parser.option_groups: lines.append('.SS '+group.title) + if group.description: + lines.extend(['.PP', group.description]) for opt in group.option_list: lines.extend(format_option(opt)) @@ -54,10 +56,4 @@ def create_man_page(prog, parser): return bz2.compress('\n'.join(lines)) -def main(): - from calibre.ebooks.epub.from_any import option_parser - open('/tmp/any2epub.1calibre.bz2', 'w').write(create_man_page( - 'any2epub', option_parser())) - -if __name__ == '__main__': - main() \ No newline at end of file + From 4e128c10736b2695976812b6d4ca893f152a995e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 13:45:54 -0700 Subject: [PATCH 06/58] Commit so I can pull from trunk --- src/calibre/__init__.py | 19 --- src/calibre/customize/builtins.py | 3 +- src/calibre/customize/conversion.py | 47 +++++++- src/calibre/customize/profiles.py | 36 +++++- src/calibre/customize/ui.py | 24 +++- src/calibre/ebooks/conversion/cli.py | 146 +++++++++++++++++++++++ src/calibre/ebooks/conversion/plumber.py | 73 +++++++++++- src/calibre/ebooks/html.py | 7 +- src/calibre/ebooks/mobi/input.py | 7 +- src/calibre/ebooks/oeb/output.py | 17 +++ src/calibre/linux.py | 1 + src/calibre/utils/logging.py | 28 +++-- 12 files changed, 356 insertions(+), 52 deletions(-) create mode 100644 src/calibre/ebooks/conversion/cli.py create mode 100644 src/calibre/ebooks/oeb/output.py diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index de133ddb57..030aab8317 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -7,7 +7,6 @@ import sys, os, re, logging, time, subprocess, atexit, mimetypes, \ __builtin__.__dict__['dynamic_property'] = lambda(func): func(None) from htmlentitydefs import name2codepoint from math import floor -from logging import Formatter from PyQt4.QtCore import QUrl from PyQt4.QtGui import QDesktopServices @@ -318,24 +317,6 @@ def english_sort(x, y): ''' return cmp(_spat.sub('', x), _spat.sub('', y)) -class ColoredFormatter(Formatter): - - def format(self, record): - ln = record.__dict__['levelname'] - col = '' - if ln == 'CRITICAL': - col = terminal_controller.YELLOW - elif ln == 'ERROR': - col = terminal_controller.RED - elif ln in ['WARN', 'WARNING']: - col = terminal_controller.BLUE - elif ln == 'INFO': - col = terminal_controller.GREEN - elif ln == 'DEBUG': - col = terminal_controller.CYAN - record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL - return Formatter.format(self, record) - def walk(dir): ''' A nice interface to os.walk ''' for record in os.walk(dir): diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index fafe8e5afa..ca21bbb215 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -244,9 +244,10 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput +from calibre.ebooks.oeb.output import OEBOutput from calibre.customize.profiles import input_profiles -plugins = [HTML2ZIP, EPUBInput, MOBIInput] +plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index aa7b0c1dea..f20cc4ae85 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -37,19 +37,24 @@ class ConversionOption(object): if not self.help: raise ValueError('You must set the help text') + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return hash(self) == hash(other) class OptionRecommendation(object): LOW = 1 MED = 2 HIGH = 3 - def __init__(self, recommeded_value, level=LOW, **kwargs): + def __init__(self, recommended_value=None, level=LOW, **kwargs): ''' An option recommendation. That is, an option as well as its recommended value and the level of the recommendation. ''' self.level = level - self.recommended_value = recommeded_value + self.recommended_value = recommended_value self.option = kwargs.pop('option', None) if self.option is None: self.option = ConversionOption(**kwargs) @@ -59,10 +64,12 @@ class OptionRecommendation(object): def validate_parameters(self): if self.option.choices and self.recommended_value not in \ self.option.choices: - raise ValueError('Recommended value not in choices') + raise ValueError('OpRec: %s: Recommended value not in choices'% + self.option.name) if not (isinstance(self.recommended_value, (int, float, str, unicode))\ - or self.default is None): - raise ValueError(unicode(self.default) + + or self.recommended_value is None): + raise ValueError('OpRec: %s:'%self.option.name + + repr(self.recommended_value) + ' is not a string or a number') @@ -186,4 +193,34 @@ class InputFormatPlugin(Plugin): return ret + + +class OutputFormatPlugin(Plugin): + ''' + OutputFormatPlugins are responsible for converting an OEB document + (OPF+HTML) into an output ebook. + + The OEB document can be assumed to be encoded in UTF-8. + The main action happens in :method:`convert`. + ''' + + type = _('Conversion Output') + can_be_disabled = False + supported_platforms = ['windows', 'osx', 'linux'] + + #: The file type (extension without leading period) that this + #: plugin outputs + file_type = None + + #: Options shared by all Input format plugins. Do not override + #: in sub-classes. Use :member:`options` instead. Every option must be an + #: instance of :class:`OptionRecommendation`. + common_options = set([]) + + #: Options to customize the behavior of this plugin. Every option must be an + #: instance of :class:`OptionRecommendation`. + options = set([]) + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + raise NotImplementedError diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 002f56879f..a3a7e22298 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -3,6 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import sys, re from calibre.customize import Plugin class InputProfile(Plugin): @@ -16,12 +17,43 @@ class InputProfile(Plugin): # inherit from this profile and override as needed name = 'Default Input Profile' - short_name = 'default' # Used in the CLI so dont spaces etc. in it + short_name = 'default' # Used in the CLI so dont use spaces etc. in it description = _('This profile tries to provide sane defaults and is useful ' 'if you know nothing about the input document.') input_profiles = [InputProfile] - +class OutputProfile(Plugin): + author = 'Kovid Goyal' + supported_platforms = set(['windows', 'osx', 'linux']) + can_be_disabled = False + type = _('Output profile') + + name = 'Default Output Profile' + short_name = 'default' # Used in the CLI so dont use spaces etc. in it + description = _('This profile tries to provide sane defaults and is useful ' + 'if you want to produce a document intended to be read at a ' + 'computer or on a range of devices.') + + epub_flow_size = sys.maxint + screen_size = None + remove_special_chars = False + remove_object_tags = False + +class SonyReader(OutputProfile): + + name = 'Sony Reader' + short_name = 'sony' + description = _('This profile is intended for the SONY PRS line. ' + 'The 500/505/700 etc.') + + epub_flow_size = 270000 + screen_size = (590, 765) + remove_special_chars = re.compile(u'[\u200b\u00ad]') + remove_object_tags = True + + + +output_profiles = [OutputProfile, SonyReader] \ No newline at end of file diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 1cdafae4f0..d8b7ebf6d8 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -6,8 +6,8 @@ import os, shutil, traceback, functools, sys from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin -from calibre.customize.conversion import InputFormatPlugin -from calibre.customize.profiles import InputProfile +from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin +from calibre.customize.profiles import InputProfile, OutputProfile from calibre.customize.builtins import plugins as builtin_plugins from calibre.constants import __version__, iswindows, isosx from calibre.ebooks.metadata import MetaInformation @@ -76,6 +76,12 @@ def input_profiles(): if isinstance(plugin, InputProfile): yield plugin +def output_profiles(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputProfile): + yield plugin + + def reread_filetype_plugins(): global _on_import global _on_preprocess @@ -245,9 +251,19 @@ def input_format_plugins(): def plugin_for_input_format(fmt): for plugin in input_format_plugins(): - if fmt in plugin.file_types: + if fmt.lower() in plugin.file_types: return plugin - + +def output_format_plugins(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputFormatPlugin): + yield plugin + +def plugin_for_output_format(fmt): + for plugin in output_format_plugins(): + if fmt.lower() == plugin.file_type: + return plugin + def disable_plugin(plugin_or_name): x = getattr(plugin_or_name, 'name', plugin_or_name) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py new file mode 100644 index 0000000000..174fa87a5d --- /dev/null +++ b/src/calibre/ebooks/conversion/cli.py @@ -0,0 +1,146 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +''' +Command line interface to conversion sub-system +''' + +USAGE = '%prog ' + _('''\ +input_file output_file [options] + +Convert an ebook from one format to another. + +input_file is the input and output_file is the output. Both must be +specified as the first two arguments to the command. + +The output ebook format is guessed from the file extension of +output_file. output_file can also be of the special format .EXT where +EXT is the output file extension. In this case, the name of the output +file is derived the name of the input file. Note that the filenames must +not start with a hyphen. Finally, if output_file has no extension, then +it is treated as a directory and an "open ebook" (OEB) consisting of HTML files +is written to that directory. These files are the files that would normally +have been passed to the output plugin. + + +After specifying the input +and output file you can customize the conversion by specifying various +options, listed below. + +For full documentation of the conversion system see + +''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html' + +import sys, os + +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) + +def check_command_line_options(parser, args, log): + if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'): + print_help(parser) + log.error('\n\nYou must specify the input AND output files') + raise SystemExit(1) + + input = os.path.abspath(args[1]) + if not os.access(input, os.R_OK): + log.error('Cannot read from', input) + raise SystemExit(1) + + output = args[2] + if output.startswith('.'): + output = os.path.splitext(os.path.basename(input))[0]+output + output = os.path.abspath(output) + + if '.' in output: + if os.path.exists(output): + log.warn('WARNING:', output, 'exists. Deleting.') + os.remove(output) + + return input, output + +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = [opt.short_switch] if opt.short_switch else [] + switches.append(opt.long_switch) + add_option(opt.name, switches=switches, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + +def add_input_output_options(parser, plumber): + input_options, output_options = \ + plumber.input_options, plumber.output_options + + def add_options(group, options): + for opt in options: + option_recommendation_to_cli_option(group, opt) + + if input_options: + io = parser.add_group(plumber.input_fmt.upper() + ' ' + _('OPTIONS')) + add_options(io, input_options) + + if output_options: + oo = parser.add_group(plumber.output_fmt.upper() + ' ' + _('OPTIONS')) + add_options(oo, output_options) + +def add_pipeline_options(parser, plumber): + groups = { + '' : ('', + [ + 'input_profile', + 'output_profile', + ] + ), + + 'DEBUG': (_('Options to help with debugging the conversion'), + [ + 'verbose', + ]), + + + } + + + for group, spec in groups.items(): + desc, options = spec + if group: + group = parser.add_option_group(group, desc) + add_option = group if group != '' else parser.add_option + + for name in options: + rec = plumber.get_option_by_name(name) + if rec.level < rec.HIGH: + option_recommendation_to_cli_option(add_option, rec) + + + + +def main(args=sys.argv): + log = Log() + parser = OptionParser(usage=USAGE) + fargs = parser.parse_args(args)[1] + + input, output = check_command_line_options(parser, fargs, log) + + from calibre.ebooks.conversion.plumber import Plumber + + plumber = Plumber(input, output, log) + add_input_output_options(parser, plumber) + add_pipeline_options(parser, plumber) + + opts = parser.parse_args(args)[0] + recommendations = [(n.dest, getattr(opts, n.dest)) \ + for n in parser.options_iter()] + + plumber.merge_ui_recommendations(recommendations) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index ac7490bd39..742653251d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -3,11 +3,15 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from calibre.customize.conversion import OptionRecommendation -from calibre.customize.ui import input_profiles +from calibre.customize.ui import input_profiles, output_profiles, \ + plugin_for_input_format, plugin_for_output_format -pipeline_options = [ +class Plumber(object): + + pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, @@ -16,7 +20,6 @@ OptionRecommendation(name='verbose', 'verbosity.') ), - OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], @@ -27,4 +30,66 @@ OptionRecommendation(name='input_profile', 'pixels).') ), -] \ No newline at end of file +OptionRecommendation(name='output_profile', + recommended_value='default', level=OptionRecommendation.LOW, + choices=[x.short_name for x in output_profiles()], + help=_('Specify the output profile. The output profile ' + 'tells the conversion system how to optimize the ' + 'created document for the specified device. In some cases, ' + 'an output profile is required to produce documents that ' + 'will work on a device. For example EPUB on the SONY reader.' + ) + ), + +] + + def __init__(self, input, output, log): + self.input = input + self.output = output + self.log = log + + input_fmt = os.path.splitext(input)[1] + if not input_fmt: + raise ValueError('Input file must have and extension') + input_fmt = input_fmt[1:].lower() + + output_fmt = os.path.splitext(input)[1] + if not output_fmt: + output_fmt = '.oeb' + output_fmt = output_fmt[1:].lower() + + self.input_plugin = plugin_for_input_format(input_fmt) + self.output_plugin = plugin_for_output_format(output_fmt) + + if self.input_plugin is None: + raise ValueError('No plugin to handle input format: '+input_fmt) + + if self.output_plugin is None: + raise ValueError('No plugin to handle output format: '+output_fmt) + + self.input_fmt = input_fmt + self.output_fmt = output_fmt + + self.input_options = self.input_plugin.options.union( + self.input_plugin.common_options) + self.output_options = self.output_plugin.options.union( + self.output_plugin.common_options) + + self.merge_plugin_recommendations() + + def get_option_by_name(self, name): + for group in (self.input_options, self.pipeline_options, + self.output_options): + for rec in group: + if rec.option == name: + return rec + + def merge_plugin_recommendations(self): + pass + + def merge_ui_recommendations(self, recommendations): + pass + + + + \ No newline at end of file diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 710b544007..191d552709 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -19,11 +19,10 @@ from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \ from lxml.etree import XPath get_text = XPath("//text()") -from calibre import LoggingInterface, unicode_path, entity_to_unicode +from calibre import unicode_path, entity_to_unicode from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS from calibre.utils.config import Config, StringConfig from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPF, OPFCreator from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.zipfile import ZipFile @@ -401,7 +400,7 @@ class PreProcessor(object): html = rule[0].sub(rule[1], html) return html -class Parser(PreProcessor, LoggingInterface): +class Parser(PreProcessor): # SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont' # SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in # [ @@ -412,7 +411,6 @@ class Parser(PreProcessor, LoggingInterface): # ] def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'): - LoggingInterface.__init__(self, logging.getLogger(name)) self.setup_cli_handler(opts.verbose) self.htmlfile = htmlfile self.opts = opts @@ -1038,6 +1036,7 @@ def merge_metadata(htmlfile, opf, opts): if opf: mi = MetaInformation(opf) elif htmlfile: + from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(open(htmlfile, 'rb'), 'html') except: diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 1ce9950677..fa56b5c6b4 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,8 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os - from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): @@ -18,12 +16,11 @@ class MOBIInput(InputFormatPlugin): from calibre.ebooks.mobi.reader import MobiReader mr = MobiReader(stream, log, options.input_encoding, options.debug_input) - mr.extract_content(output_dir=os.getcwdu(), parse_cache) + mr.extract_content('.', parse_cache) raw = parse_cache.get('calibre_raw_mobi_markup', False) if raw: if isinstance(raw, unicode): raw = raw.encode('utf-8') open('debug-raw.html', 'wb').write(raw) - return mr.created_opf_path - + return mr.created_opf_path \ No newline at end of file diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py new file mode 100644 index 0000000000..0a74f488cf --- /dev/null +++ b/src/calibre/ebooks/oeb/output.py @@ -0,0 +1,17 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.customize.conversion import OutputFormatPlugin + +class OEBOutput(OutputFormatPlugin): + + name = 'OEB Output' + author = 'Kovid Goyal' + file_type = 'oeb' + + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + pass + diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 427b41ca5f..ae6cb10818 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -18,6 +18,7 @@ entry_points = { 'console_scripts': [ \ 'ebook-device = calibre.devices.prs500.cli.main:main', 'ebook-meta = calibre.ebooks.metadata.cli:main', + 'ebook-convert = calibre.ebooks.convert.cli:main', 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', 'html2lrf = calibre.ebooks.lrf.html.convert_from:main', 'html2oeb = calibre.ebooks.html:main', diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py index ae2e1a792b..d5a55ac48b 100644 --- a/src/calibre/utils/logging.py +++ b/src/calibre/utils/logging.py @@ -13,13 +13,25 @@ ERROR = 3 import sys, traceback from functools import partial -from calibre import prints -from calibre.utils.terminfo import TerminalController -class ANSIStream: + + +class Stream(object): + + def __init__(self, stream): + from calibre import prints + self._prints = prints + self.stream = stream + + def flush(self): + self.stream.flush() + + +class ANSIStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) + from calibre.utils.terminfo import TerminalController tc = TerminalController(stream) self.color = { DEBUG: tc.GREEN, @@ -32,16 +44,16 @@ class ANSIStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self): self.stream.flush() -class HTMLStream: +class HTMLStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) self.color = { DEBUG: '', INFO:'', @@ -53,7 +65,7 @@ class HTMLStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self): From 3ea639199653b6552bd019f0b15aa64686088d15 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 14:49:43 -0700 Subject: [PATCH 07/58] pluginize installs again. Also working framework for ebook-convert --- src/calibre/customize/builtins.py | 4 +- src/calibre/customize/conversion.py | 2 +- src/calibre/ebooks/conversion/cli.py | 69 ++++++++++++--------- src/calibre/ebooks/conversion/plumber.py | 4 +- src/calibre/ebooks/epub/from_any.py | 2 +- src/calibre/ebooks/epub/pages.py | 2 +- src/calibre/ebooks/epub/split.py | 5 +- src/calibre/ebooks/lrf/html/convert_from.py | 5 +- src/calibre/ebooks/mobi/writer.py | 2 +- src/calibre/ebooks/oeb/base.py | 9 --- src/calibre/library/database2.py | 2 +- src/calibre/linux.py | 48 ++------------ src/calibre/web/feeds/news.py | 5 +- src/calibre/web/fetch/simple.py | 5 +- 14 files changed, 60 insertions(+), 104 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index ca21bbb215..b6a6141612 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -245,11 +245,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.oeb.output import OEBOutput -from calibre.customize.profiles import input_profiles +from calibre.customize.profiles import input_profiles, output_profiles plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataWriter')] -plugins += input_profiles \ No newline at end of file +plugins += input_profiles + output_profiles \ No newline at end of file diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index f20cc4ae85..10e5a44ddd 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -24,7 +24,7 @@ class ConversionOption(object): self.choices = choices if self.long_switch is None: - self.long_switch = '--'+self.name.replace('_', '-') + self.long_switch = self.name.replace('_', '-') self.validate_parameters() diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 174fa87a5d..83bcb453e9 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -12,28 +12,29 @@ input_file output_file [options] Convert an ebook from one format to another. -input_file is the input and output_file is the output. Both must be +input_file is the input and output_file is the output. Both must be \ specified as the first two arguments to the command. -The output ebook format is guessed from the file extension of -output_file. output_file can also be of the special format .EXT where -EXT is the output file extension. In this case, the name of the output -file is derived the name of the input file. Note that the filenames must -not start with a hyphen. Finally, if output_file has no extension, then -it is treated as a directory and an "open ebook" (OEB) consisting of HTML files -is written to that directory. These files are the files that would normally -have been passed to the output plugin. +The output ebook format is guessed from the file extension of \ +output_file. output_file can also be of the special format .EXT where \ +EXT is the output file extension. In this case, the name of the output \ +file is derived the name of the input file. Note that the filenames must \ +not start with a hyphen. Finally, if output_file has no extension, then \ +it is treated as a directory and an "open ebook" (OEB) consisting of HTML \ +files is written to that directory. These files are the files that would \ +normally have been passed to the output plugin. - -After specifying the input -and output file you can customize the conversion by specifying various -options, listed below. +After specifying the input \ +and output file you can customize the conversion by specifying various \ +options. the available options depend on the input and output file types. \ +To get help on them specify the input and output file and then use the -h \ +option. For full documentation of the conversion system see - ''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html' import sys, os +from optparse import OptionGroup, Option from calibre.utils.config import OptionParser from calibre.utils.logging import Log @@ -68,10 +69,11 @@ def check_command_line_options(parser, args, log): def option_recommendation_to_cli_option(add_option, rec): opt = rec.option - switches = [opt.short_switch] if opt.short_switch else [] - switches.append(opt.long_switch) - add_option(opt.name, switches=switches, help=opt.help, + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) def add_input_output_options(parser, plumber): input_options, output_options = \ @@ -82,12 +84,18 @@ def add_input_output_options(parser, plumber): option_recommendation_to_cli_option(group, opt) if input_options: - io = parser.add_group(plumber.input_fmt.upper() + ' ' + _('OPTIONS')) - add_options(io, input_options) + title = plumber.input_fmt.upper() + ' ' + _('OPTIONS') + io = OptionGroup(parser, title, _('Options to control the processing' + ' of the input file')) + add_options(io.add_option, input_options) + parser.add_option_group(io) if output_options: - oo = parser.add_group(plumber.output_fmt.upper() + ' ' + _('OPTIONS')) - add_options(oo, output_options) + title = plumber.output_fmt.upper() + ' ' + _('OPTIONS') + oo = OptionGroup(parser, title, _('Options to control the processing' + ' of the output file')) + add_options(oo.add_option, output_options) + parser.add_option_group(oo) def add_pipeline_options(parser, plumber): groups = { @@ -106,27 +114,28 @@ def add_pipeline_options(parser, plumber): } + group_order = ['', 'DEBUG'] - for group, spec in groups.items(): - desc, options = spec + for group in group_order: + desc, options = groups[group] if group: - group = parser.add_option_group(group, desc) - add_option = group if group != '' else parser.add_option + group = OptionGroup(parser, group, desc) + parser.add_option_group(group) + add_option = group.add_option if group != '' else parser.add_option for name in options: rec = plumber.get_option_by_name(name) if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) - - - def main(args=sys.argv): log = Log() parser = OptionParser(usage=USAGE) - fargs = parser.parse_args(args)[1] + if len(args) < 3: + print_help(parser, log) + return 1 - input, output = check_command_line_options(parser, fargs, log) + input, output = check_command_line_options(parser, args, log) from calibre.ebooks.conversion.plumber import Plumber diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 742653251d..bd4d365af8 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -50,10 +50,10 @@ OptionRecommendation(name='output_profile', input_fmt = os.path.splitext(input)[1] if not input_fmt: - raise ValueError('Input file must have and extension') + raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower() - output_fmt = os.path.splitext(input)[1] + output_fmt = os.path.splitext(output)[1] if not output_fmt: output_fmt = '.oeb' output_fmt = output_fmt[1:].lower() diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py index 9a8e251108..b3e5281525 100644 --- a/src/calibre/ebooks/epub/from_any.py +++ b/src/calibre/ebooks/epub/from_any.py @@ -12,7 +12,7 @@ from contextlib import nested from calibre import extract, walk from calibre.ebooks import DRMError -from calibre.ebooks.epub import config as common_config, process_encryption +from calibre.ebooks.epub import config as common_config from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.metadata import MetaInformation diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py index 1ab5edde86..4737107a6c 100644 --- a/src/calibre/ebooks/epub/pages.py +++ b/src/calibre/ebooks/epub/pages.py @@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en' import os, re from itertools import count, chain from calibre.ebooks.oeb.base import XHTML, XHTML_NS -from calibre.ebooks.oeb.base import OEBBook, DirWriter +from calibre.ebooks.oeb.base import OEBBook from lxml import etree, html from lxml.etree import XPath diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py index 9814c40df5..c3099c1682 100644 --- a/src/calibre/ebooks/epub/split.py +++ b/src/calibre/ebooks/epub/split.py @@ -15,7 +15,7 @@ from lxml.cssselect import CSSSelector from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.epub import tostring, rules -from calibre import CurrentDir, LoggingInterface +from calibre import CurrentDir XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'}) content = functools.partial(os.path.join, 'content') @@ -32,10 +32,9 @@ class SplitError(ValueError): -class Splitter(LoggingInterface): +class Splitter(object): def __init__(self, path, opts, stylesheet_map, opf): - LoggingInterface.__init__(self, logging.getLogger('htmlsplit')) self.setup_cli_handler(opts.verbose) self.path = path self.always_remove = not opts.preserve_tag_structure or \ diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2bd63d1d8f..056666b301 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -31,7 +31,7 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser from calibre.ebooks import ConversionError from calibre.ebooks.lrf.html.table import Table from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \ - fit_image, LoggingInterface, preferred_encoding + fit_image, preferred_encoding from calibre.ptempfile import PersistentTemporaryFile from calibre.devices.interface import Device from calibre.ebooks.lrf.html.color_map import lrs_color @@ -78,7 +78,7 @@ def tag_regex(tagname): return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \ close=r''%dict(t=tagname)) -class HTMLConverter(object, LoggingInterface): +class HTMLConverter(object): SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) @@ -209,7 +209,6 @@ class HTMLConverter(object, LoggingInterface): ''' # Defaults for various formatting tags object.__setattr__(self, 'options', options) - LoggingInterface.__init__(self, logger) self.fonts = fonts #: dict specifying font families to use # Memory self.scaled_images = {} #: Temporary files with scaled version of images diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index fdabfaa618..86224488c0 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -24,7 +24,7 @@ from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ OEB_RASTER_IMAGES from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname from calibre.ebooks.oeb.base import urlnormalize -from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2e160d1571..f7c472320e 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -15,7 +15,6 @@ from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote from lxml import etree, html import calibre -from calibre import LoggingInterface from calibre.translations.dynamic import translate from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.entitydefs import ENTITYDEFS @@ -212,14 +211,6 @@ class FauxLogger(object): def __call__(self, message): print message -class Logger(LoggingInterface, object): - """A logging object which provides both the standard `logging.Logger` and - calibre-specific interfaces. - """ - def __getattr__(self, name): - return object.__getattribute__(self, 'log_' + name) - - class NullContainer(object): """An empty container. diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f8b63f1124..cb823e6c73 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -15,7 +15,7 @@ from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock from PyQt4.QtGui import QApplication, QImage __app = None -from calibre.library import title_sort +from calibre.ebooks.metadata import title_sort from calibre.library.database import LibraryDatabase from calibre.library.sqlite import connect, IntegrityError from calibre.utils.search_query_parser import SearchQueryParser diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 369dfa3d2c..e08222ed3a 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -1,9 +1,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' Post installation script for linux ''' -import sys, os, re, shutil +import sys, os, shutil from subprocess import check_call, call -from tempfile import NamedTemporaryFile from calibre import __version__, __appname__ from calibre.devices import devices @@ -18,16 +17,8 @@ entry_points = { 'console_scripts': [ \ 'ebook-device = calibre.devices.prs500.cli.main:main', 'ebook-meta = calibre.ebooks.metadata.cli:main', - 'ebook-convert = calibre.ebooks.convert.cli:main', - 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', - 'html2lrf = calibre.ebooks.lrf.html.convert_from:main', - 'html2oeb = calibre.ebooks.html:main', - 'html2epub = calibre.ebooks.epub.from_html:main', - 'odt2oeb = calibre.ebooks.odt.to_oeb:main', + 'ebook-convert = calibre.ebooks.conversion.cli:main', 'markdown-calibre = calibre.ebooks.markdown.markdown:main', - 'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main', - 'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main', - 'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main', 'web2disk = calibre.web.fetch.simple:main', 'feeds2disk = calibre.web.feeds.main:main', 'calibre-server = calibre.library.server:main', @@ -35,22 +26,10 @@ entry_points = { 'feeds2epub = calibre.ebooks.epub.from_feeds:main', 'feeds2mobi = calibre.ebooks.mobi.from_feeds:main', 'web2lrf = calibre.ebooks.lrf.web.convert_from:main', - 'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main', - 'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main', - 'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main', - 'any2lrf = calibre.ebooks.lrf.any.convert_from:main', - 'any2epub = calibre.ebooks.epub.from_any:main', - 'any2lit = calibre.ebooks.lit.from_any:main', - 'any2mobi = calibre.ebooks.mobi.from_any:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', - 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'isbndb = calibre.ebooks.metadata.isbndb:main', 'librarything = calibre.ebooks.metadata.library_thing:main', - 'mobi2oeb = calibre.ebooks.mobi.reader:main', - 'oeb2mobi = calibre.ebooks.mobi.writer:main', - 'lit2oeb = calibre.ebooks.lit.reader:main', - 'oeb2lit = calibre.ebooks.lit.writer:main', 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main', 'comic2epub = calibre.ebooks.epub.from_comic:main', 'comic2mobi = calibre.ebooks.mobi.from_comic:main', @@ -61,7 +40,6 @@ entry_points = { 'calibre-parallel = calibre.parallel:main', 'calibre-customize = calibre.customize.ui:main', 'pdftrim = calibre.ebooks.pdf.pdftrim:main' , - 'any2pdf = calibre.ebooks.pdf.from_any:main', ], 'gui_scripts' : [ __appname__+' = calibre.gui2.main:main', @@ -172,25 +150,16 @@ def setup_completion(fatal_errors): from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop - from calibre.ebooks.mobi.reader import option_parser as mobioeb - from calibre.ebooks.lit.reader import option_parser as lit2oeb from calibre.web.feeds.main import option_parser as feeds2disk from calibre.web.feeds.recipes import titles as feed_titles from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop - from calibre.ebooks.epub.from_html import option_parser as html2epub - from calibre.ebooks.html import option_parser as html2oeb - from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi - from calibre.ebooks.epub.from_any import option_parser as any2epub - from calibre.ebooks.lit.from_any import option_parser as any2lit from calibre.ebooks.epub.from_comic import option_parser as comic2epub - from calibre.ebooks.mobi.from_any import option_parser as any2mobi - from calibre.ebooks.mobi.writer import option_parser as oeb2mobi - from calibre.gui2.main import option_parser as guiop + from calibre.gui2.main import option_parser as guiop any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', - 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] + 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] f = open_file('/etc/bash_completion.d/libprs500') f.close() os.remove(f.name) @@ -210,16 +179,10 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf'])) f.write(opts_and_exts('any2lrf', htmlop, any_formats)) f.write(opts_and_exts('calibre', guiop, any_formats)) - f.write(opts_and_exts('any2epub', any2epub, any_formats)) - f.write(opts_and_exts('any2lit', any2lit, any_formats)) - f.write(opts_and_exts('any2mobi', any2mobi, any_formats)) - f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf'])) f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf'])) f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes()))) f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf'])) - f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc'])) - f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit'])) f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr'])) @@ -228,9 +191,6 @@ def setup_completion(fatal_errors): f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles)) f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles)) - f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf'])) - f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml'])) - f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt'])) f.write(''' _prs500_ls() { diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4773d551c3..7d61cead5b 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -17,7 +17,7 @@ from PyQt4.Qt import QApplication, QFile, Qt, QPalette, QSize, QImage, QPainter, from PyQt4.QtWebKit import QWebPage -from calibre import browser, __appname__, iswindows, LoggingInterface, \ +from calibre import browser, __appname__, iswindows, \ strftime, __version__, preferred_encoding from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator @@ -32,7 +32,7 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2 import images_rc # Needed for default cover -class BasicNewsRecipe(object, LoggingInterface): +class BasicNewsRecipe(object): ''' Abstract base class that contains logic needed in all feed fetchers. ''' @@ -444,7 +444,6 @@ class BasicNewsRecipe(object, LoggingInterface): :param parser: Command line option parser. Used to intelligently merge options. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. ''' - LoggingInterface.__init__(self, logging.getLogger('feeds2disk')) if not isinstance(self.title, unicode): self.title = unicode(self.title, 'utf-8', 'replace') diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 4da3f4019c..51a4554a50 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -15,7 +15,7 @@ from PIL import Image from cStringIO import StringIO from calibre import setup_cli_handlers, browser, sanitize_file_name, \ - relpath, LoggingInterface, unicode_path + relpath, unicode_path from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.config import OptionParser @@ -80,7 +80,7 @@ class DummyLock(object): def __enter__(self, *args): return self def __exit__(self, *args): pass -class RecursiveFetcher(object, LoggingInterface): +class RecursiveFetcher(object): LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in ('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$')) #ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in @@ -93,7 +93,6 @@ class RecursiveFetcher(object, LoggingInterface): DUMMY_LOCK = DummyLock() def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): - LoggingInterface.__init__(self, logger) self.base_dir = os.path.abspath(os.path.expanduser(options.dir)) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) From 06f67d6cde7c267e1862c9f69feb1a3bd0859e99 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 21:41:56 -0700 Subject: [PATCH 08/58] Fix #2003 (lrs2lrf mistranslates XML entities) --- src/calibre/ebooks/lrf/lrs/convert_from.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index 495d9adb50..86a97aa70b 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -28,8 +28,9 @@ class LrsParser(object): def __init__(self, stream, logger): self.logger = logger src = stream.read() - self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], - selfClosingTags=self.SELF_CLOSING_TAGS) + self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], + convertEntities=BeautifulStoneSoup.XML_ENTITIES, + selfClosingTags=self.SELF_CLOSING_TAGS) self.objects = {} for obj in self.soup.findAll(objid=True): self.objects[obj['objid']] = obj From 8dcb895f79e74fecc4ec8b0245db21020fccd5cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 21:47:48 -0700 Subject: [PATCH 09/58] Fix #2012 (epub output problems on PRS505) --- src/calibre/web/feeds/recipes/recipe_usatoday.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/web/feeds/recipes/recipe_usatoday.py b/src/calibre/web/feeds/recipes/recipe_usatoday.py index dd14cd01b4..ced927e721 100644 --- a/src/calibre/web/feeds/recipes/recipe_usatoday.py +++ b/src/calibre/web/feeds/recipes/recipe_usatoday.py @@ -45,3 +45,7 @@ class USAToday(BasicNewsRecipe): def print_version(self, url): return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + return soup \ No newline at end of file From 4df17ef0326ce21c9893c75b9d3281abcaed992b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 22:10:05 -0700 Subject: [PATCH 10/58] Fix the Opinion section of the no subscription NYTimes recipe --- src/calibre/web/feeds/recipes/recipe_nytimes.py | 6 +++++- src/calibre/web/feeds/recipes/recipe_nytimes_sub.py | 4 +++- src/calibre/web/feeds/recipes/recipe_usatoday.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_nytimes.py b/src/calibre/web/feeds/recipes/recipe_nytimes.py index 270bd5d499..9276ad667a 100644 --- a/src/calibre/web/feeds/recipes/recipe_nytimes.py +++ b/src/calibre/web/feeds/recipes/recipe_nytimes.py @@ -70,10 +70,14 @@ class NYTimesMobile(BasicNewsRecipe): def find_articles(self, root): for a in root.xpath('//a[@accesskey]'): href = a.get('href') + if href.startswith('http://'): + url = href + else: + url = 'http://mobile.nytimes.com/article' + href[href.find('?'):]+'&single=1', yield { 'title': a.text.strip(), 'date' : '', - 'url' : 'http://mobile.nytimes.com/article' + href[href.find('?'):]+'&single=1', + 'url' : url, 'description': '', } diff --git a/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py b/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py index 4a4286b335..5d91dbae38 100644 --- a/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py +++ b/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py @@ -75,7 +75,9 @@ class NYTimes(BasicNewsRecipe): dict(title=title, url=url, date=pubdate, description=description, content='')) - ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) + ans = self.sort_index_by(ans, {'The Front Page':-1, + 'Dining In, Dining Out':1, + 'Obituaries':2}) ans = [(key, articles[key]) for key in ans if articles.has_key(key)] return ans diff --git a/src/calibre/web/feeds/recipes/recipe_usatoday.py b/src/calibre/web/feeds/recipes/recipe_usatoday.py index ced927e721..b69fce42f3 100644 --- a/src/calibre/web/feeds/recipes/recipe_usatoday.py +++ b/src/calibre/web/feeds/recipes/recipe_usatoday.py @@ -45,7 +45,7 @@ class USAToday(BasicNewsRecipe): def print_version(self, url): return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url - def postprocess_html(self, soup, first_fetch): + def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' return soup \ No newline at end of file From 39452cf714d71410a90e914711402bcd19b6df8b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 09:01:27 -0700 Subject: [PATCH 11/58] Implement #1996 (Trimming option in cbr conversion) --- src/calibre/ebooks/lrf/comic/convert_from.py | 6 +- src/calibre/gui2/dialogs/comicconf.ui | 144 ++++++++++--------- 2 files changed, 81 insertions(+), 69 deletions(-) diff --git a/src/calibre/ebooks/lrf/comic/convert_from.py b/src/calibre/ebooks/lrf/comic/convert_from.py index 45254f7b87..50f5e1e72e 100755 --- a/src/calibre/ebooks/lrf/comic/convert_from.py +++ b/src/calibre/ebooks/lrf/comic/convert_from.py @@ -143,7 +143,8 @@ class PageProcessor(list): MagickRotateImage(wand, pw, -90) # 25 percent fuzzy trim? - MagickTrimImage(wand, 25*65535/100) + if not self.opts.disable_trim: + MagickTrimImage(wand, 25*65535/100) MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage" # Do the Photoshop "Auto Levels" equivalent if not self.opts.dont_normalize: @@ -303,6 +304,9 @@ def config(defaults=None,output_format='lrf'): help=_('Maintain picture aspect ratio. Default is to fill the screen.')) c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False, help=_('Disable sharpening.')) + c.add_opt('disable_trim', ['--disable-trim'], default=False, + help=_('Disable trimming of comic pages. For some comics, ' + 'trimming might remove content as well as borders.')) c.add_opt('landscape', ['-l', '--landscape'], default=False, help=_("Don't split landscape images into two portrait images")) c.add_opt('wide', ['-w', '--wide-aspect'], default=False, diff --git a/src/calibre/gui2/dialogs/comicconf.ui b/src/calibre/gui2/dialogs/comicconf.ui index 36af85764a..acab125d57 100644 --- a/src/calibre/gui2/dialogs/comicconf.ui +++ b/src/calibre/gui2/dialogs/comicconf.ui @@ -1,154 +1,162 @@ - + + Dialog - - + + 0 0 646 - 468 + 503 - + Dialog - - + + :/images/convert.svg:/images/convert.svg - - - - + + + + &Title: - + opt_title - - + + - - - + + + &Author(s): - + opt_author - - + + - - - + + + &Number of Colors: - + opt_colors - - - + + + 8 - + 3200000 - + 8 - - - + + + &Profile: - + opt_profile - - + + - - - + + + Disable &normalize - - - + + + Keep &aspect ratio - - - + + + Disable &Sharpening - - - + + + &Landscape - - - + + + Don't so&rt - - - + + + Qt::Horizontal - + QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - + + + &Right to left - - - + + + De&speckle - - - + + + &Wide + + + + Disable &Trimming + + + - + @@ -157,11 +165,11 @@ Dialog accept() - + 248 254 - + 157 274 @@ -173,11 +181,11 @@ Dialog reject() - + 316 260 - + 286 274 From a5c2d485f0f3fe71f06c75b496b61102f5c5b464 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 09:44:30 -0700 Subject: [PATCH 12/58] New recipe for EcoGeek by Darko Miletic --- src/calibre/ebooks/metadata/isbndb.py | 3 +- src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_ecogeek.py | 31 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_ecogeek.py diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 3cf5f92eaf..487a52335b 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -112,7 +112,8 @@ key is the account key you generate after signing up for a free account from isb default=None, help=_('The title of the book to search for.')) parser.add_option('-p', '--publisher', default=None, dest='publisher', help=_('The publisher of the book to search for.')) - parser.add_option('--verbose', default=False, action='store_true', help=_('Verbose processing')) + parser.add_option('-v', '--verbose', default=False, + action='store_true', help=_('Verbose processing')) return parser diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 8253021c57..b2c18b26a8 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', 'wikinews_en', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_ecogeek.py b/src/calibre/web/feeds/recipes/recipe_ecogeek.py new file mode 100644 index 0000000000..7695763295 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_ecogeek.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +EcoGeek.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class EcoGeek(BasicNewsRecipe): + title = 'EcoGeek' + __author__ = 'Darko Miletic' + description = 'EcoGeek - Technology for the Environment Blog Feed' + publisher = 'EcoGeek' + language = _('English') + category = 'news, ecology, blog' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')] From df904b2fe08e20ca21d557aa6e813f15034cf640 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 09:47:10 -0700 Subject: [PATCH 13/58] version 0.5.0 --- src/calibre/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index aebcb35dc0..913c54bffe 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.143' +__version__ = '0.5.0' __author__ = "Kovid Goyal " ''' Various run time constants. From a73b293f8959d4f4210b596aa318dc74481b8729 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 09:49:56 -0700 Subject: [PATCH 14/58] IGN:Tag release From 9edb807322bd5b62f3e9d09037d702eeedfe9d17 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 13:12:48 -0700 Subject: [PATCH 15/58] IGN:... --- src/calibre/manual/faq.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index bb1eb9ba02..c069842e53 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -34,6 +34,8 @@ What formats does |app| support conversion to/from? | | | | | | | | ODT | ✔ | ✔ | ✔ | | | | | | | +| | FB2 | ✔ | ✔ | ✔ | +| | | | | | | | HTML | ✔ | ✔ | ✔ | | | | | | | | **Input formats** | CBR | ✔ | ✔ | ✔ | From 6bb46288283b6ec265a09fd474f09684a821a318 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 13:13:51 -0700 Subject: [PATCH 16/58] Commit so I can pull from trunk --- src/calibre/ebooks/conversion/cli.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 83bcb453e9..9a320bc40f 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -127,10 +127,13 @@ def add_pipeline_options(parser, plumber): rec = plumber.get_option_by_name(name) if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) - + +def option_parser(): + return OptionParser(usage=USAGE) + def main(args=sys.argv): log = Log() - parser = OptionParser(usage=USAGE) + parser = option_parser() if len(args) < 3: print_help(parser, log) return 1 @@ -147,9 +150,9 @@ def main(args=sys.argv): recommendations = [(n.dest, getattr(opts, n.dest)) \ for n in parser.options_iter()] - plumber.merge_ui_recommendations(recommendations) + plumber.merge_ui_recommendations(recommendations) return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) From aedb2cf959cb153b289e6c4b1ca5cda8d0b8fd54 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 18:40:34 -0700 Subject: [PATCH 17/58] Fix #2025 (Many LIT file covers detected sideways) --- src/calibre/ebooks/metadata/lit.py | 16 ++++++++++++---- src/calibre/gui2/dialogs/config.py | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index c38450c64c..2129af76dd 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -19,15 +19,23 @@ def get_metadata(stream): for item in opf.iterguide(): if 'cover' not in item.get('type', '').lower(): continue + ctype = item.get('type') href = item.get('href', '') candidates = [href, href.replace('&', '%26')] for item in litfile.manifest.values(): if item.path in candidates: - covers.append(item.internal) + try: + covers.append((litfile.get_file('/data/'+item.internal), + ctype)) + except: + pass break - covers = [litfile.get_file('/data/' + i) for i in covers] - covers.sort(cmp=lambda x, y:cmp(len(x), len(y))) - mi.cover_data = ('jpg', covers[-1]) + covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) + idx = 0 + if len(covers) > 1: + if covers[1][1] == covers[1][0]+'-standard': + idx = 1 + mi.cover_data = ('jpg', covers[idx][0]) return mi def main(args=sys.argv): diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 5353f24544..9958ce53fa 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog): self.language.addItem(language_codes[lang], QVariant(lang)) else: lang = 'en' - self.language.addItem('English', 'en') + self.language.addItem('English', QVariant('en')) items = [(l, language_codes[l]) for l in translations.keys() \ if l != lang] if lang != 'en': From 8ea72440c8813920f9d077d90c11e0abc37b858d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 10:40:56 -0700 Subject: [PATCH 18/58] New recipe for The Daily Mail UK by RufusA. Fix #998 (HTML2LRF and empty headings) --- src/calibre/ebooks/lrf/html/convert_from.py | 4 +++ src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_daily_mail.py | 33 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_daily_mail.py diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2bd63d1d8f..c72bcfbfe5 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface): # Replace common line break patterns with line breaks (re.compile(r'

    ( |\s)*

    ', re.IGNORECASE), lambda m: '
    '), + # Replace empty headers with line breaks + (re.compile(r'( |\s)*', + re.IGNORECASE), lambda m: '
    '), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index b2c18b26a8..793d5cf45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py new file mode 100644 index 0000000000..c64e328bf2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheDailyMail(BasicNewsRecipe): + title = u'The Daily Mail' + oldest_article = 2 + language = _('English') + author = 'RufusA' + simultaneous_downloads= 1 + max_articles_per_feed = 50 + + extra_css = 'h1 {text-align: left;}' + + remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ] + remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) + remove_tags_before = dict(name='div', attrs={'id':'content'}) + no_stylesheets = True + + feeds = [ + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')] + + def print_version(self, url): + main = url.partition('?')[0] + return main + '?printingPage=true' From 72581c6e32f301db75ede4e9f62fb97fd17902ce Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 12:56:47 -0700 Subject: [PATCH 19/58] MOBI Input: Strip and
    tags as ADE refuses to hadle them gracefully when converted to EPUB --- src/calibre/ebooks/mobi/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 3ca1fd6c18..967a68aea8 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -300,7 +300,7 @@ class MobiReader(object): mobi_version = self.book_header.mobi_version for tag in root.iter(etree.Element): if tag.tag in ('country-region', 'place', 'placetype', 'placename', - 'state', 'city'): + 'state', 'city', 'street', 'address'): tag.tag = 'span' for key in tag.attrib.keys(): tag.attrib.pop(key) From 06e5659d7968234f78d11e4c7c6ee98dd73fab77 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 13:01:18 -0700 Subject: [PATCH 20/58] IGN:Better error handling when library is on a removable device that no longer exists --- src/calibre/gui2/main.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 163a9d8bd0..4ecfc08f58 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow): dir = os.path.expanduser('~/Library') self.library_path = os.path.abspath(dir) if not os.path.exists(self.library_path): - os.makedirs(self.library_path) + try: + os.makedirs(self.library_path) + except: + self.library_path = os.path.expanduser('~/Library') + error_dialog(self, _('Invalid library location'), + _('Could not access %s. Using %s as the library.')% + (repr(self.library_path), repr(self.library_path)) + ).exec_() + os.makedirs(self.library_path) def read_settings(self): From a52286c594bb4b67d07db232b258e9a9fbb9f800 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 13:02:11 -0700 Subject: [PATCH 21/58] IGN:Fix handling of input files that specify an encoding that python doesn't support --- src/calibre/ebooks/chardet/__init__.py | 3 ++- src/calibre/trac/donations/server.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index af6d724883..971ac9bc9a 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, try: raw = raw.decode(encoding, 'replace') except LookupError: - raw = raw.decode('utf-8', 'replace') + encoding = 'utf-8' + raw = raw.decode(encoding, 'replace') if strip_encoding_pats: raw = strip_encoding_declarations(raw) diff --git a/src/calibre/trac/donations/server.py b/src/calibre/trac/donations/server.py index 8e7a096353..24174db801 100644 --- a/src/calibre/trac/donations/server.py +++ b/src/calibre/trac/donations/server.py @@ -196,7 +196,7 @@ class Server(object): def calculate_month_trend(self, days=31): stats = self.get_slice(date.today()-timedelta(days=days-1), date.today()) - fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) + fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) fig.clear() ax = fig.add_subplot(111) x = list(range(days-1, -1, -1)) @@ -216,7 +216,7 @@ Donors per day: %(dpd).2f ad=stats.average_deviation, dpd=len(stats.totals)/float(stats.period.days), ) - text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction') + text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction') fig.savefig(self.MONTH_TRENDS) def calculate_trend(self): From 74486fc40df983a35ca4230a3d26904b0b4cf12b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 19:23:43 -0700 Subject: [PATCH 22/58] EPUB Output: Strip
    tags since ADE runs screaming when it sees one. Fixes #2029 (IHT resetting P505) --- src/calibre/ebooks/epub/from_html.py | 3 +++ src/calibre/web/feeds/__init__.py | 3 +-- src/calibre/web/feeds/news.py | 3 ++- src/calibre/web/feeds/recipes/recipe_iht.py | 8 +++++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ffe402538f..47d278a2b6 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + for tag in self.root.xpath('//form'): + tag.getparent().remove(tag) + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 3f0ec414a2..4a0f6b47f7 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -98,7 +98,7 @@ class Feed(object): if len(self.articles) >= max_articles_per_feed: break self.parse_article(item) - + def populate_from_preparsed_feed(self, title, articles, oldest_article=7, max_articles_per_feed=100): @@ -156,7 +156,6 @@ class Feed(object): content = None if not link and not content: return - article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4773d551c3..13a79201e2 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1012,7 +1012,8 @@ class BasicNewsRecipe(object, LoggingInterface): feed.description = unicode(err) parsed_feeds.append(feed) self.log_exception(msg) - + + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py index c30be70dea..1bee27d061 100644 --- a/src/calibre/web/feeds/recipes/recipe_iht.py +++ b/src/calibre/web/feeds/recipes/recipe_iht.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald' ''' iht.com ''' +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'})] + remove_tags = [dict(name='div', attrs={'class':'footer'}), + dict(name=['form'])] + preprocess_regexps = [ + (re.compile(r'