diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index b7b752de3b..5dab02a138 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -23,13 +23,14 @@ from gettext import GNUTranslations from math import floor from optparse import OptionParser as _OptionParser from optparse import IndentedHelpFormatter +from logging import Formatter from ttfquery import findsystem, describe from libprs500.translations.msgfmt import make from libprs500.ebooks.chardet import detect from libprs500.terminfo import TerminalController -terminal_controller = TerminalController() +terminal_controller = TerminalController(sys.stdout) iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower() isosx = 'darwin' in sys.platform.lower() @@ -51,6 +52,25 @@ __builtin__.__dict__['_'] = lambda s: s class CommandLineError(Exception): pass +class ColoredFormatter(Formatter): + + def format(self, record): + ln = record.__dict__['levelname'] + col = '' + if ln == 'CRITICAL': + col = terminal_controller.YELLOW + elif ln == 'ERROR': + col = terminal_controller.RED + elif ln in ['WARN', 'WARNING']: + col = terminal_controller.BLUE + elif ln == 'INFO': + col = terminal_controller.GREEN + elif ln == 'DEBUG': + col = terminal_controller.CYAN + record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL + return Formatter.format(self, record) + + def setup_cli_handlers(logger, level): logger.setLevel(level) if level == logging.WARNING: @@ -187,9 +207,9 @@ def extract(path, dir): raise Exception('Unknown archive type') extractor(path, dir) -def browser(): +def browser(honor_time=False): opener = mechanize.Browser() - opener.set_handle_refresh(True) + opener.set_handle_refresh(True, honor_time=honor_time) opener.set_handle_robots(False) opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')] return opener diff --git a/src/libprs500/ebooks/lrf/pylrs/pylrs.py b/src/libprs500/ebooks/lrf/pylrs/pylrs.py index adc8855a0f..45b141d47a 100644 --- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py +++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py @@ -147,7 +147,7 @@ class Delegator(object): d.parent = self methods = d.getMethods() self.delegatedMethods += methods - for m in methods: + for m in methods: setattr(self, m, getattr(d, m)) """ diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py index e1d078d9a0..833f8ae51f 100644 --- a/src/libprs500/ebooks/metadata/opf.py +++ b/src/libprs500/ebooks/metadata/opf.py @@ -595,6 +595,11 @@ class OPFCreator(OPF): self.uid = mi.uid def create_manifest(self, entries): + ''' + Create + @param entries: List of (URL, mime-type) + @type entries: list of 2-tuples + ''' doc = dom.parseString(self.soup.__str__('UTF-8').strip()) package = doc.documentElement manifest = doc.createElement('manifest') @@ -616,6 +621,11 @@ class OPFCreator(OPF): def create_spine(self, entries): + ''' + Create the element. Must first call L{create_manifest}. + @param: List of paths + @type param: list of strings + ''' doc = dom.parseString(self.soup.__str__('UTF-8').strip()) package = doc.documentElement spine = doc.createElement('spine') diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py index baf5c02467..0d06482aee 100644 --- a/src/libprs500/linux.py +++ b/src/libprs500/linux.py @@ -74,6 +74,34 @@ def options(option_parser): opts.extend(opt._long_opts) return opts +def opts_and_words(name, op, words): + opts = ' '.join(options(op)) + words = [repr(w) for w in words] + words = ' '.join(words) + return '_'+name+'()'+\ +''' +{ + local cur prev opts + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + opts="%s" + words="%s" + + case "${cur}" in + -* ) + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + ;; + * ) + COMPREPLY=( $(compgen -W "${words}" -- ${cur}) ) + return 0 + ;; + esac + +} +complete -F _'''%(opts, words) + name + ' ' + name +"\n\n" + + def opts_and_exts(name, op, exts): opts = ' '.join(options(op)) exts.extend([i.upper() for i in exts]) @@ -135,6 +163,8 @@ def setup_completion(fatal_errors): from libprs500.gui2.lrf_renderer.main import option_parser as lrfviewerop from libprs500.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop from libprs500.ebooks.mobi.reader import option_parser as mobioeb + from libprs500.web.feeds.main import option_parser as feeds2disk + from libprs500.web.feeds.recipes import titles as feed_titles f = open_file('/etc/bash_completion.d/libprs500') @@ -159,6 +189,7 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf'])) f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc'])) + f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles)) f.write(''' _prs500_ls() { diff --git a/src/libprs500/parallel.py b/src/libprs500/parallel.py index 56724220ea..05fcc1c8cf 100644 --- a/src/libprs500/parallel.py +++ b/src/libprs500/parallel.py @@ -15,7 +15,7 @@ ''' Used to run jobs in parallel in separate processes. ''' -import re, sys, tempfile, os, cPickle, cStringIO, traceback, atexit, binascii, time, subprocess +import re, sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess from functools import partial diff --git a/src/libprs500/terminfo.py b/src/libprs500/terminfo.py index 1114f39642..fca163d988 100644 --- a/src/libprs500/terminfo.py +++ b/src/libprs500/terminfo.py @@ -106,7 +106,7 @@ class TerminalController: except: return # If the stream isn't a tty, then assume it has no capabilities. - if not term_stream.isatty(): return + if not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return # Check the terminal type. If we fail, then assume that the # terminal has no capabilities. diff --git a/src/libprs500/threadpool.py b/src/libprs500/threadpool.py index 917665bf92..19b1a7b038 100644 --- a/src/libprs500/threadpool.py +++ b/src/libprs500/threadpool.py @@ -97,7 +97,8 @@ class WorkerThread(threading.Thread): ) except: request.exception = True - self.resultQueue.put((request, sys.exc_info())) + import traceback + self.resultQueue.put((request, traceback.format_exc())) def dismiss(self): """Sets a flag to tell the thread to exit when done with current job. diff --git a/src/libprs500/web/feeds/__init__.py b/src/libprs500/web/feeds/__init__.py index f54f3c254a..1a3083131d 100644 --- a/src/libprs500/web/feeds/__init__.py +++ b/src/libprs500/web/feeds/__init__.py @@ -27,6 +27,7 @@ class Article(object): time_offset = datetime.now() - datetime.utcnow() def __init__(self, id, title, url, summary, published, content): + self.downloaded = False self.id = id self.title = title self.url = url @@ -103,7 +104,7 @@ class Feed(object): if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: - self.logger.debug('Skipping article %s as it is too old.'%title) + self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) def __iter__(self): return iter(self.articles) @@ -118,6 +119,12 @@ class Feed(object): def __str__(self): return repr(self) + + def __bool__(self): + for article in self: + if getattr(article, 'downloaded', False): + return True + return False def feed_from_xml(raw_xml, title=None, oldest_article=7, max_articles_per_feed=100): diff --git a/src/libprs500/web/feeds/main.py b/src/libprs500/web/feeds/main.py index df5ef3c36b..8e2d7d0701 100644 --- a/src/libprs500/web/feeds/main.py +++ b/src/libprs500/web/feeds/main.py @@ -18,21 +18,24 @@ from libprs500.web.feeds.news import BasicNewsRecipe '''''' import sys, os, logging -from libprs500.web.recipes import get_feed, compile_recipe +from libprs500.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles from libprs500.web.fetch.simple import option_parser as _option_parser def option_parser(usage='''\ -%prog [options] ARG +%%prog [options] ARG -%prog parsers an online source of articles, like an RSS or ATOM feed and +%%prog parsers an online source of articles, like an RSS or ATOM feed and fetches the article contents organized in a nice hierarchy. ARG can be one of: -file name - %prog will try to load a recipe from the file -builtin recipe title - %prog will load the builtin recipe and use it to fetch the feed. For e.g. Newsweek or "The BBC" or "The New York Times" -recipe as a string - $prog will load the recipe directly from the string arg. -'''): +file name - %%prog will try to load a recipe from the file +builtin recipe title - %%prog will load the builtin recipe and use it to fetch the feed. For e.g. Newsweek or "The BBC" or "The New York Times" +recipe as a string - %%prog will load the recipe directly from the string arg. + +Available builtin recipes are: +%s +'''%(unicode(list(titles))[1:-1])): p = _option_parser(usage=usage) p.remove_option('--max-recursions') p.remove_option('--base-dir') @@ -86,7 +89,7 @@ def main(args=sys.argv, notification=None, handler=None): else: notification = no_progress_bar - if len(args) != 2: + if len(args) != 2 and opts.feeds is None: p.print_help() return 1 @@ -96,11 +99,16 @@ def main(args=sys.argv, notification=None, handler=None): else: try: if os.access(args[1], os.R_OK): - recipe = compile_recipe(open(args[1]).read()) + try: + recipe = compile_recipe(open(args[1]).read()) + except: + import traceback + traceback.print_exc() + return 1 else: - raise Exception('') + raise Exception('not file') except: - recipe = get_feed(args[1]) + recipe = get_builtin_recipe(args[1]) if recipe is None: recipe = compile_recipe(args[1]) @@ -111,9 +119,10 @@ def main(args=sys.argv, notification=None, handler=None): return 1 if handler is None: + from libprs500 import ColoredFormatter handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG if opts.debug else logging.INFO if opts.verbose else logging.WARN) - handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s')) + handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar logging.getLogger('feeds2disk').addHandler(handler) recipe = recipe(opts, p, notification) diff --git a/src/libprs500/web/feeds/news.py b/src/libprs500/web/feeds/news.py index debc6d8a92..46c5549598 100644 --- a/src/libprs500/web/feeds/news.py +++ b/src/libprs500/web/feeds/news.py @@ -17,11 +17,13 @@ The backend to parse feeds and create HTML that can then be converted to an ebook. ''' -import logging, os, cStringIO, traceback, time +import logging, os, cStringIO, time, itertools, traceback import urlparse -from libprs500 import browser +from libprs500 import browser, __appname__ from libprs500.ebooks.BeautifulSoup import BeautifulSoup +from libprs500.ebooks.metadata.opf import OPFCreator +from libprs500.ebooks.metadata import MetaInformation from libprs500.web.feeds import feed_from_xml, templates from libprs500.web.fetch.simple import option_parser as web2disk_option_parser from libprs500.web.fetch.simple import RecursiveFetcher @@ -35,7 +37,10 @@ class BasicNewsRecipe(object): #: The title to use for the ebook #: @type: string - title = 'Unknown News Source' + title = _('Unknown News Source') + + #: The author of this recipe + __author__ = _('Unknown') #: Maximum number of articles to download from each feed #: @type: integer @@ -55,17 +60,18 @@ class BasicNewsRecipe(object): delay = 0 #: Number of simultaneous downloads. Set to 1 if the server is picky. + #: Automatically reduced to 1 if L{delay} > 0 #: @type: integer simultaneous_downloads = 5 #: Timeout for fetching files from server in seconds #: @type: integer - timeout = 10 + timeout = 120 #: The format string for the date shown on the first page #: By default: Day Name Day Number Month Name Year #: @type: string - timefmt = ' %a, %d %b %Y' + timefmt = ' [%a, %d %b %Y]' #: Max number of characters in the short description. #: @type: integer @@ -102,7 +108,7 @@ class BasicNewsRecipe(object): #: List of options to pass to html2lrf, to customize generation of LRF ebooks. #: @type: list of strings - html2lrf_options = [] + html2lrf_options = ['--page-break-before', '$'] #: List of tags to be removed. Specified tags are removed from downloaded HTML. #: A tag is specified as a dictionary of the form:: @@ -114,9 +120,23 @@ class BasicNewsRecipe(object): #: U{http://www.crummy.com/software/BeautifulSoup/documentation.html#The basic find method: findAll(name, attrs, recursive, text, limit, **kwargs)} #: A common example:: #: remove_tags = [dict(name='div', attrs={'class':'advert'})] - #: This will remove all