From 1663619ceffc4b56cff57ed82835aee3f363c53f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Apr 2013 21:04:52 +0530 Subject: [PATCH 01/22] ... --- resources/default_tweaks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ff1a53de96..9851d76af4 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -79,7 +79,7 @@ author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council', # By default, calibre splits a string containing multiple author names on # ampersands and the words "and" and "with". You can customize the splitting # by changing the regular expression below. Strings are split on whatever the -# specified regular expression matches. +# specified regular expression matches, in addition to ampersands. # Default: r'(?i),?\s+(and|with)\s+' authors_split_regex = r'(?i),?\s+(and|with)\s+' From 9ba0272b0c1768980c0d5cff8275dbb83d0bcf5b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Apr 2013 22:54:09 +0530 Subject: [PATCH 02/22] Metadata download: Add a plugin to download book covers from a google image search. Go to Preferences->Metadata download and enable the plugin to use it. Google Image search often finds larger and/or different covers from the other sources, however, it sometimes finds junk. Use at your discretion. --- src/calibre/customize/builtins.py | 3 +- src/calibre/customize/ui.py | 2 +- src/calibre/ebooks/metadata/sources/amazon.py | 2 +- src/calibre/ebooks/metadata/sources/base.py | 10 +- src/calibre/ebooks/metadata/sources/covers.py | 11 +- src/calibre/ebooks/metadata/sources/douban.py | 2 +- .../ebooks/metadata/sources/edelweiss.py | 2 +- src/calibre/ebooks/metadata/sources/google.py | 2 +- .../ebooks/metadata/sources/google_images.py | 148 ++++++++++++++++++ .../ebooks/metadata/sources/openlibrary.py | 2 +- .../ebooks/metadata/sources/overdrive.py | 2 +- src/calibre/ebooks/metadata/sources/ozon.py | 12 +- src/calibre/ebooks/metadata/sources/worker.py | 8 +- src/calibre/gui2/metadata/single_download.py | 117 +++++++++----- 14 files changed, 263 insertions(+), 60 deletions(-) create mode 100644 src/calibre/ebooks/metadata/sources/google_images.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index e157c36c5e..c87c8c63d0 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -757,8 +757,9 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.ozon import Ozon +from calibre.ebooks.metadata.sources.google_images import GoogleImages -plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] +plugins += [GoogleBooks, Amazon, Edelweiss, GoogleImages, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] # }}} diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 849d1a21f4..06fd2784e4 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name): config['enabled_plugins'] = ep default_disabled_plugins = set([ - 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', + 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', ]) def is_disabled(plugin): diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index a8e15a6d94..3fefe2d886 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -858,7 +858,7 @@ class Amazon(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index e15d11c3c1..41812af8eb 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False # Google covers are often poor quality (scans/errors) but they have high # resolution, so they trump covers from better sources. So make sure they # are only used if no other covers are found. -msprefs.defaults['cover_priorities'] = {'Google':2} +msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2} def create_log(ostream=None): from calibre.utils.logging import ThreadSafeLog, FileStream @@ -222,6 +222,9 @@ class Source(Plugin): #: plugin config_help_message = None + #: If True this source can return multiple covers for a given query + can_get_multiple_covers = False + def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) @@ -522,7 +525,7 @@ class Source(Plugin): return None def download_cover(self, log, result_queue, abort, - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): ''' Download a cover and put it into result_queue. The parameters all have the same meaning as for :meth:`identify`. Put (self, cover_data) into @@ -531,6 +534,9 @@ class Source(Plugin): This method should use cached cover URLs for efficiency whenever possible. When cached data is not present, most plugins simply call identify and use its results. + + If the parameter get_best_cover is True and this plugin can get + multiple covers, it should only get the "best" one. ''' pass diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py index d28ce146c6..0fe963e3f7 100644 --- a/src/calibre/ebooks/metadata/sources/covers.py +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -35,9 +35,14 @@ class Worker(Thread): start_time = time.time() if not self.abort.is_set(): try: - self.plugin.download_cover(self.log, self.rq, self.abort, - title=self.title, authors=self.authors, - identifiers=self.identifiers, timeout=self.timeout) + if self.plugin.can_get_multiple_covers: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, get_best_cover=True, + identifiers=self.identifiers, timeout=self.timeout) + else: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, + identifiers=self.identifiers, timeout=self.timeout) except: self.log.exception('Failed to download cover from', self.plugin.name) diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py index 6857d62d4d..f955fb8a79 100644 --- a/src/calibre/ebooks/metadata/sources/douban.py +++ b/src/calibre/ebooks/metadata/sources/douban.py @@ -221,7 +221,7 @@ class Douban(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/edelweiss.py b/src/calibre/ebooks/metadata/sources/edelweiss.py index c86f16ff0d..53ae6c6ee3 100644 --- a/src/calibre/ebooks/metadata/sources/edelweiss.py +++ b/src/calibre/ebooks/metadata/sources/edelweiss.py @@ -320,7 +320,7 @@ class Edelweiss(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 3962afcb5e..c03f20cd6b 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -209,7 +209,7 @@ class GoogleBooks(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/google_images.py b/src/calibre/ebooks/metadata/sources/google_images.py new file mode 100644 index 0000000000..c755fea192 --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/google_images.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from collections import OrderedDict + +from calibre import as_unicode +from calibre.ebooks.metadata.sources.base import Source, Option + +class GoogleImages(Source): + + name = 'Google Images' + description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.') + capabilities = frozenset(['cover']) + config_help_message = _('Configure the Google Image Search plugin') + can_get_multiple_covers = True + options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'), + _('The maximum number of covers to process from the google search result')), + Option('size', 'choices', 'svga', _('Cover size'), + _('Search for covers larger than the specified size'), + choices=OrderedDict(( + ('any', _('Any size'),), + ('l', _('Large'),), + ('qsvga', _('Larger than %s')%'400x300',), + ('vga', _('Larger than %s')%'640x480',), + ('svga', _('Larger than %s')%'600x800',), + ('xga', _('Larger than %s')%'1024x768',), + ('2mp', _('Larger than %s')%'2 MP',), + ('4mp', _('Larger than %s')%'4 MP',), + ))), + ) + + def download_cover(self, log, result_queue, abort, + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): + if not title: + return + from threading import Thread + import time + timeout = max(60, timeout) # Needs at least a minute + title = ' '.join(self.get_title_tokens(title)) + author = ' '.join(self.get_author_tokens(authors)) + urls = self.get_image_urls(title, author, log, abort, timeout) + if not urls: + log('No images found in Google for, title: %r and authors: %r'%(title, author)) + return + urls = urls[:self.prefs['max_covers']] + if get_best_cover: + urls = urls[:1] + workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls] + for w in workers: + w.daemon = True + w.start() + alive = True + start_time = time.time() + while alive and not abort.is_set() and time.time() - start_time < timeout: + alive = False + for w in workers: + if w.is_alive(): + alive = True + break + abort.wait(0.1) + + def download_image(self, url, timeout, log, result_queue): + try: + ans = self.browser.open_novisit(url, timeout=timeout).read() + result_queue.put((self, ans)) + log('Downloaded cover from: %s'%url) + except Exception: + self.log.exception('Failed to download cover from: %r'%url) + + def get_image_urls(self, title, author, log, abort, timeout): + from calibre.utils.ipc.simple_worker import fork_job, WorkerError + try: + return fork_job('calibre.ebooks.metadata.sources.google_images', + 'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result'] + except WorkerError as e: + if e.orig_tb: + log.error(e.orig_tb) + log.exception('Searching google failed:' + as_unicode(e)) + except Exception as e: + log.exception('Searching google failed:' + as_unicode(e)) + + return [] + +USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Firefox/3.6.13' + +def find_image_urls(br, ans): + import urlparse + for w in br.page.mainFrame().documentElement().findAll('.images_table a[href]'): + try: + imgurl = urlparse.parse_qs(urlparse.urlparse(unicode(w.attribute('href'))).query)['imgurl'][0] + except: + continue + if imgurl not in ans: + ans.append(imgurl) + +def search(title, author, size, timeout, debug=False): + import time + from calibre.web.jsbrowser.browser import Browser, LoadWatcher, Timeout + ans = [] + start_time = time.time() + br = Browser(user_agent=USER_AGENT, enable_developer_tools=debug) + br.visit('https://www.google.com/advanced_image_search') + f = br.select_form('form[action="/search"]') + f['as_q'] = '%s %s'%(title, author) + if size != 'any': + f['imgsz'] = size + f['imgar'] = 't|xt' + f['as_filetype'] = 'jpg' + br.submit(wait_for_load=False) + + # Loop until the page finishes loading or at least five image urls are + # found + lw = LoadWatcher(br.page, br) + while lw.is_loading and len(ans) < 5: + br.run_for_a_time(0.2) + find_image_urls(br, ans) + if time.time() - start_time > timeout: + raise Timeout('Timed out trying to load google image search page') + find_image_urls(br, ans) + if debug: + br.show_browser() + br.close() + del br # Needed to prevent PyQt from segfaulting + return ans + +def test_google(): + import pprint + pprint.pprint(search('heroes', 'abercrombie', 'svga', 60, debug=True)) + +def test(): + from Queue import Queue + from threading import Event + from calibre.utils.logging import default_log + p = GoogleImages(None) + rq = Queue() + p.download_cover(default_log, rq, Event(), title='The Heroes', + authors=('Joe Abercrombie',)) + print ('Downloaded', rq.qsize(), 'covers') + +if __name__ == '__main__': + test() + diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py index 4645d2a18a..b0eeb940a5 100644 --- a/src/calibre/ebooks/metadata/sources/openlibrary.py +++ b/src/calibre/ebooks/metadata/sources/openlibrary.py @@ -19,7 +19,7 @@ class OpenLibrary(Source): OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' def download_cover(self, log, result_queue, abort, - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): if 'isbn' not in identifiers: return isbn = identifiers['isbn'] diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index 6d6ebd3990..b232c7c9a4 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -75,7 +75,7 @@ class OverDrive(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): import mechanize cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py index ebb104818f..0f4b0c2c53 100644 --- a/src/calibre/ebooks/metadata/sources/ozon.py +++ b/src/calibre/ebooks/metadata/sources/ozon.py @@ -55,7 +55,7 @@ class Ozon(Source): # for ozon.ru search we have to format ISBN with '-' isbn = _format_isbn(log, identifiers.get('isbn', None)) ozonid = identifiers.get('ozon', None) - + unk = unicode(_('Unknown')).upper() if (title and title != unk) or (authors and authors != [unk]) or isbn or not ozonid: qItems = set([isbn, title]) @@ -64,19 +64,19 @@ class Ozon(Source): qItems.discard(None) qItems.discard('') qItems = map(_quoteString, qItems) - + q = u' '.join(qItems).strip() log.info(u'search string: ' + q) - + if isinstance(q, unicode): q = q.encode('utf-8') if not q: return None - + search_url += quote_plus(q) else: search_url = self.ozon_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % ozonid - + log.debug(u'search url: %r'%search_url) return search_url # }}} @@ -250,7 +250,7 @@ class Ozon(Source): return url # }}} - def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): # {{{ + def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): # {{{ cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.debug('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/worker.py b/src/calibre/ebooks/metadata/sources/worker.py index 48f0f99584..51fb883e7d 100644 --- a/src/calibre/ebooks/metadata/sources/worker.py +++ b/src/calibre/ebooks/metadata/sources/worker.py @@ -11,6 +11,7 @@ import os from threading import Event, Thread from Queue import Queue, Empty from io import BytesIO +from collections import Counter from calibre.utils.date import as_utc from calibre.ebooks.metadata.sources.identify import identify, msprefs @@ -113,13 +114,18 @@ def single_covers(title, authors, identifiers, caches, tdir): kwargs=dict(title=title, authors=authors, identifiers=identifiers)) worker.daemon = True worker.start() + c = Counter() while worker.is_alive(): try: plugin, width, height, fmt, data = results.get(True, 1) except Empty: continue else: - name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt) + name = plugin.name + if plugin.can_get_multiple_covers: + name += '{%d}'%c[plugin.name] + c[plugin.name] += 1 + name = '%s,,%s,,%s,,%s.cover'%(name, width, height, fmt) with open(name, 'wb') as f: f.write(data) os.mkdir(name+'.done') diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index e4a78b674a..ffa83b6ea8 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -16,13 +16,12 @@ from operator import attrgetter from Queue import Queue, Empty from io import BytesIO -from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, - QApplication, QDialog, QVBoxLayout, QLabel, - QDialogButtonBox, QStyle, QStackedWidget, QWidget, - QTableView, QGridLayout, QFontInfo, QPalette, QTimer, - pyqtSignal, QAbstractTableModel, QVariant, QSize, - QListView, QPixmap, QAbstractListModel, QColor, QRect, - QTextBrowser, QStringListModel) +from PyQt4.Qt import ( + QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, QApplication, + QDialog, QVBoxLayout, QLabel, QDialogButtonBox, QStyle, QStackedWidget, + QWidget, QTableView, QGridLayout, QFontInfo, QPalette, QTimer, pyqtSignal, + QAbstractTableModel, QVariant, QSize, QListView, QPixmap, QModelIndex, + QAbstractListModel, QColor, QRect, QTextBrowser, QStringListModel) from PyQt4.QtWebKit import QWebView from calibre.customize.ui import metadata_plugins @@ -654,7 +653,7 @@ class CoversModel(QAbstractListModel): # {{{ for i, plugin in enumerate(metadata_plugins(['cover'])): self.covers.append((plugin.name+'\n'+_('Searching...'), QVariant(self.blank), None, True)) - self.plugin_map[plugin] = i+1 + self.plugin_map[plugin] = [i+1] if do_reset: self.reset() @@ -685,48 +684,82 @@ class CoversModel(QAbstractListModel): # {{{ def plugin_for_index(self, index): row = index.row() if hasattr(index, 'row') else index for k, v in self.plugin_map.iteritems(): - if v == row: + if row in v: return k - def cover_keygen(self, x): - pmap = x[2] - if pmap is None: - return 1 - return pmap.width()*pmap.height() - def clear_failed(self): + # Remove entries that are still waiting good = [] pmap = {} - dcovers = sorted(self.covers[1:], key=self.cover_keygen, reverse=True) - cmap = {x:self.covers.index(x) for x in self.covers} + def keygen(x): + pmap = x[2] + if pmap is None: + return 1 + return pmap.width()*pmap.height() + dcovers = sorted(self.covers[1:], key=keygen, reverse=True) + cmap = {i:self.plugin_for_index(i) for i in xrange(len(self.covers))} for i, x in enumerate(self.covers[0:1] + dcovers): if not x[-1]: good.append(x) - if i > 0: - plugin = self.plugin_for_index(cmap[x]) - pmap[plugin] = len(good) - 1 + plugin = cmap[i] + if plugin is not None: + try: + pmap[plugin].append(len(good) - 1) + except KeyError: + pmap[plugin] = [len(good)-1] self.covers = good self.plugin_map = pmap self.reset() - def index_for_plugin(self, plugin): - idx = self.plugin_map.get(plugin, 0) - return self.index(idx) + def pointer_from_index(self, index): + row = index.row() if hasattr(index, 'row') else index + try: + return self.covers[row][2] + except IndexError: + pass + + def index_from_pointer(self, pointer): + for r, (text, scaled, pmap, waiting) in enumerate(self.covers): + if pointer == pmap: + return self.index(r) + return self.index(0) def update_result(self, plugin_name, width, height, data): - idx = None - for plugin, i in self.plugin_map.iteritems(): - if plugin.name == plugin_name: - idx = i - break - if idx is None: - return - pmap = QPixmap() - pmap.loadFromData(data) - if pmap.isNull(): - return - self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False) - self.dataChanged.emit(self.index(idx), self.index(idx)) + if plugin_name.endswith('}'): + # multi cover plugin + plugin_name = plugin_name.partition('{')[0] + plugin = [plugin for plugin in self.plugin_map if plugin.name == plugin_name] + if not plugin: + return + plugin = plugin[0] + last_row = max(self.plugin_map[plugin]) + pmap = QPixmap() + pmap.loadFromData(data) + if pmap.isNull(): + return + self.beginInsertRows(QModelIndex(), last_row, last_row) + for rows in self.plugin_map.itervalues(): + for i in xrange(len(rows)): + if rows[i] >= last_row: + rows[i] += 1 + self.plugin_map[plugin].insert(-1, last_row) + self.covers.insert(last_row, self.get_item(plugin_name, pmap, waiting=False)) + self.endInsertRows() + else: + # single cover plugin + idx = None + for plugin, rows in self.plugin_map.iteritems(): + if plugin.name == plugin_name: + idx = rows[0] + break + if idx is None: + return + pmap = QPixmap() + pmap.loadFromData(data) + if pmap.isNull(): + return + self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False) + self.dataChanged.emit(self.index(idx), self.index(idx)) def cover_pixmap(self, index): row = index.row() @@ -774,9 +807,12 @@ class CoversView(QListView): # {{{ self.m.reset_covers() def clear_failed(self): - plugin = self.m.plugin_for_index(self.currentIndex()) + pointer = self.m.pointer_from_index(self.currentIndex()) self.m.clear_failed() - self.select(self.m.index_for_plugin(plugin).row()) + if pointer is None: + self.select(0) + else: + self.select(self.m.index_from_pointer(pointer).row()) # }}} @@ -852,10 +888,11 @@ class CoversWidget(QWidget): # {{{ if num < 2: txt = _('Could not find any covers for %s')%self.book.title else: - txt = _('Found %(num)d covers of %(title)s. ' - 'Pick the one you like best.')%dict(num=num-1, + txt = _('Found %(num)d possible covers for %(title)s. ' + 'When the download completes, the covers will be sorted by size.')%dict(num=num-1, title=self.title) self.msg.setText(txt) + self.msg.setWordWrap(True) self.finished.emit() From 78cc12fe0e59745b89e7a3974133ca596ae39285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Tue, 2 Apr 2013 00:16:33 +0200 Subject: [PATCH 03/22] minor fixes --- recipes/adventure_zone_pl.recipe | 1 - recipes/km_blog.recipe | 1 - recipes/sport_pl.recipe | 2 +- recipes/wirtualnemedia_pl.recipe | 2 +- recipes/wprost.recipe | 13 ++++------ recipes/wprost_rss.recipe | 42 ++++++++++++++++---------------- 6 files changed, 28 insertions(+), 33 deletions(-) diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 00b4a8753e..50a980dc92 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -66,4 +66,3 @@ class Adventure_zone(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] return soup - diff --git a/recipes/km_blog.recipe b/recipes/km_blog.recipe index 614dbc03e5..8910ee060a 100644 --- a/recipes/km_blog.recipe +++ b/recipes/km_blog.recipe @@ -20,7 +20,6 @@ class km_blog(BasicNewsRecipe): remove_javascript=True no_stylesheets=True remove_empty_feeds = True - feeds = [(u'blog', u'http://korwin-mikke.pl/blog/rss')] keep_only_tags =[] diff --git a/recipes/sport_pl.recipe b/recipes/sport_pl.recipe index 622a3675bd..4095817a6b 100644 --- a/recipes/sport_pl.recipe +++ b/recipes/sport_pl.recipe @@ -21,7 +21,7 @@ class sport_pl(BasicNewsRecipe): remove_javascript=True no_stylesheets=True remove_empty_feeds = True - + ignore_duplicate_articles = {'title', 'url'} keep_only_tags =[] keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'})) diff --git a/recipes/wirtualnemedia_pl.recipe b/recipes/wirtualnemedia_pl.recipe index 28278c2e24..ed3b3787f8 100644 --- a/recipes/wirtualnemedia_pl.recipe +++ b/recipes/wirtualnemedia_pl.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class WirtualneMedia(BasicNewsRecipe): - title = u'wirtualnemedia.pl' + title = u'Wirtualnemedia.pl' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/wprost.recipe b/recipes/wprost.recipe index 90dde251ca..d923f64a3f 100644 --- a/recipes/wprost.recipe +++ b/recipes/wprost.recipe @@ -1,10 +1,9 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' - +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -16,12 +15,12 @@ class Wprost(BasicNewsRecipe): ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png' title = u'Wprost' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Popularny tygodnik ogólnopolski - Wprost. Najlepszy wśród polskich tygodników - opiniotwórczy - społeczno-informacyjny - społeczno-kulturalny.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True - recursions = 0 + recursions = 0 remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) ''' @@ -94,5 +93,3 @@ class Wprost(BasicNewsRecipe): 'description' : '' }) return articles - - diff --git a/recipes/wprost_rss.recipe b/recipes/wprost_rss.recipe index bffbacc474..59c130fc75 100644 --- a/recipes/wprost_rss.recipe +++ b/recipes/wprost_rss.recipe @@ -1,10 +1,9 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -12,13 +11,14 @@ import re class Wprost(BasicNewsRecipe): title = u'Wprost (RSS)' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Portal informacyjny. Najświeższe wiadomości, najciekawsze komentarze i opinie. Blogi najlepszych publicystów.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True recursions = 0 use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) @@ -48,20 +48,20 @@ class Wprost(BasicNewsRecipe): #h2 {font-size: x-large; font-weight: bold} feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'), - (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), - (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), - (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), - (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), - (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), - (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), - (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), - (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), - (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), - (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), - (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), - (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), - (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') - ] + (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), + (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), + (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), + (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), + (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), + (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), + (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), + (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), + (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), + (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), + (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), + (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), + (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') + ] def get_cover_url(self): soup = self.index_to_soup('http://www.wprost.pl/tygodnik') From a5c3567aaee1313700ceb57683da2c9da2b2b071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Tue, 2 Apr 2013 00:17:14 +0200 Subject: [PATCH 04/22] icon for gazeta prawna --- recipes/icons/gazeta-prawna-calibre-v1.png | Bin 0 -> 612 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 recipes/icons/gazeta-prawna-calibre-v1.png diff --git a/recipes/icons/gazeta-prawna-calibre-v1.png b/recipes/icons/gazeta-prawna-calibre-v1.png new file mode 100644 index 0000000000000000000000000000000000000000..e5c7ae965c05881d570b48288144382bfc846b69 GIT binary patch literal 612 zcmV-q0-ODbP)j(3cVl3Ow$a`%zezc7$sZK7J8vO)|gXu&3&uOGp$%?0KSpo}^6 z1T+Z%|K8064s{Ujy~fPQRnUX;x8OoWXc18foIZ+dTY!Ra9RRf5T~Hq-dA1v4XdGfu zWMma6p|p2^c?-fJ+>A}W_X5qSBH8{v+)@dEO$Xk0CiIY z{$!zGf#Z7Z6m6(BM~e1LjF3Rc~P?j|kZKj~zs5Sp%+v_3%E$&p%PtuL8@4x;o6!_n Date: Tue, 2 Apr 2013 00:23:56 +0200 Subject: [PATCH 05/22] new recipes from kalibrator project --- recipes/dzial_zagraniczny.recipe | 29 +++++++++ recipes/equipped.recipe | 28 +++++++++ recipes/icons/dzial_zagraniczny.png | Bin 0 -> 491 bytes recipes/icons/equipped.png | Bin 0 -> 929 bytes recipes/icons/ittechblog.png | Bin 0 -> 731 bytes recipes/icons/magazyn_consido.png | Bin 0 -> 982 bytes recipes/icons/media2.png | Bin 0 -> 660 bytes recipes/icons/mobilna.png | Bin 0 -> 885 bytes recipes/icons/mojegotowanie.png | Bin 0 -> 307 bytes recipes/icons/najwyzszy_czas.png | Bin 0 -> 616 bytes recipes/icons/nowiny_rybnik.png | Bin 0 -> 1179 bytes recipes/icons/osw.png | Bin 0 -> 489 bytes recipes/icons/ppe_pl.png | Bin 0 -> 3203 bytes recipes/icons/presseurop.png | Bin 0 -> 207 bytes recipes/icons/res_publica.png | Bin 0 -> 733 bytes recipes/icons/wolne_media.png | Bin 0 -> 497 bytes recipes/ittechblog.recipe | 27 +++++++++ recipes/magazyn_consido.recipe | 88 ++++++++++++++++++++++++++++ recipes/media2.recipe | 37 ++++++++++++ recipes/mobilna.recipe | 27 +++++++++ recipes/mojegotowanie.recipe | 51 ++++++++++++++++ recipes/najwyzszy_czas.recipe | 28 +++++++++ recipes/nowiny_rybnik.recipe | 33 +++++++++++ recipes/osw.recipe | 42 +++++++++++++ recipes/ppe_pl.recipe | 41 +++++++++++++ recipes/presseurop.recipe | 32 ++++++++++ recipes/res_publica.recipe | 34 +++++++++++ recipes/wolne_media.recipe | 27 +++++++++ 28 files changed, 524 insertions(+) create mode 100644 recipes/dzial_zagraniczny.recipe create mode 100644 recipes/equipped.recipe create mode 100644 recipes/icons/dzial_zagraniczny.png create mode 100644 recipes/icons/equipped.png create mode 100644 recipes/icons/ittechblog.png create mode 100644 recipes/icons/magazyn_consido.png create mode 100644 recipes/icons/media2.png create mode 100644 recipes/icons/mobilna.png create mode 100644 recipes/icons/mojegotowanie.png create mode 100644 recipes/icons/najwyzszy_czas.png create mode 100644 recipes/icons/nowiny_rybnik.png create mode 100644 recipes/icons/osw.png create mode 100644 recipes/icons/ppe_pl.png create mode 100644 recipes/icons/presseurop.png create mode 100644 recipes/icons/res_publica.png create mode 100644 recipes/icons/wolne_media.png create mode 100644 recipes/ittechblog.recipe create mode 100644 recipes/magazyn_consido.recipe create mode 100644 recipes/media2.recipe create mode 100644 recipes/mobilna.recipe create mode 100644 recipes/mojegotowanie.recipe create mode 100644 recipes/najwyzszy_czas.recipe create mode 100644 recipes/nowiny_rybnik.recipe create mode 100644 recipes/osw.recipe create mode 100644 recipes/ppe_pl.recipe create mode 100644 recipes/presseurop.recipe create mode 100644 recipes/res_publica.recipe create mode 100644 recipes/wolne_media.recipe diff --git a/recipes/dzial_zagraniczny.recipe b/recipes/dzial_zagraniczny.recipe new file mode 100644 index 0000000000..9709186d7e --- /dev/null +++ b/recipes/dzial_zagraniczny.recipe @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +dzialzagraniczny.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class dzial_zagraniczny(BasicNewsRecipe): + title = u'Dział Zagraniczny' + __author__ = 'teepel ' + language = 'pl' + description = u'Polskiego czytelnika to nie interesuje' + INDEX = 'http://dzialzagraniczny.pl' + extra_css = 'img {display: block;}' + oldest_article = 7 + cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg' + max_articles_per_feed = 100 + remove_empty_feeds = True + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = True + + feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')] \ No newline at end of file diff --git a/recipes/equipped.recipe b/recipes/equipped.recipe new file mode 100644 index 0000000000..274315c849 --- /dev/null +++ b/recipes/equipped.recipe @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel , Artur Stachecki ' + +''' +equipped.pl +''' + +class equipped(AutomaticNewsRecipe): + title = u'Equipped' + __author__ = 'teepel ' + language = 'pl' + description = u'Wiadomości z equipped.pl' + INDEX = 'http://equipped.pl' + extra_css = '.alignleft {float:left; margin-right:5px;}' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + #keep_only_tags = [dict(name='article')] + #remove_tags = [dict(id='disqus_thread')] + #remove_tags_after = [dict(id='disqus_thread')] + + feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')] diff --git a/recipes/icons/dzial_zagraniczny.png b/recipes/icons/dzial_zagraniczny.png new file mode 100644 index 0000000000000000000000000000000000000000..1982db04626c0b9b7ca3e68271e5a57cc78eddfe GIT binary patch literal 491 zcmV}+am zBqAit%FL9OlpGoyR9aL-LPQJ-419Wgyt=$RIy9V~ob2-SBq}7#($qvsL~L?&6cQBN z+T5(FtTZw-R8myr=Hz5zWE>(K#LCP(LOhI)j06G%uiq9j00001bW%=J06^y0W&i*H z*-1n}R2Y>tk6m+uFc3x4H5B=11iO?35l5kojn$478X2|w|Nm8kjQ8ozdDy*&k&B6R zpd&d{Vnmnc%nG#FL?dX|zvzEvr?Z9j4lobO*WP6s4ulb4*?s3qttOQPNz4EoHXjWt zC7>uYu~gu7xyP`s!;Gs^vULCSer@<_=hfl10>fJTnI8Qtj^md#uTZ)h#j;bGxAW#l zdjrX2AZGocs!!fqcM*YmD}m3GnHT?UdJSfn-Xi}>7RjQ|^?jHlV3D71&To`+w%Ygo hCN=Z`MADeC&_AA+BlfHJf+qj~002ovPDHLkV1f&8*r5Ob literal 0 HcmV?d00001 diff --git a/recipes/icons/equipped.png b/recipes/icons/equipped.png new file mode 100644 index 0000000000000000000000000000000000000000..a532b6f6ac06623911045c44876e41b21628d96e GIT binary patch literal 929 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)Mp*w z6XN>s-@pI=|KGfEbH{cN*s*Q8X=rHV z=jR_ea^%~$Z_!avj0_C+wswi}3GuOU@o{kp@d+O8?pBtT9UUE4uU`H2>sL%vG&2*^ z^5x5)J$rWV-aTeUriS|ZqeqS$I(YEVp+gfUOmKB^t*Wd9+V|+uqaQzhaC35Tb8)e= zu>+mW&c>#trBzo~_weCECI&_`Q?uZppuoUDAT~EQXJKX*7ZdaI^_?+e#*G^{e*XNq zWXY00fBqale!Q!z>&)rX(b3UNOiT+GF8uoS>w^am?%uuo_wQd#4GnucJ3CujAfc3`b2nh=b3W^8|3kwOWDyziB#BSQS>EnkFFP^`+eEISf zAii|z{Q2|OuU-4}>C@K_rAa`CGA4PuyDe`W}n%Z&llh&%NS5i_^O|znIS?#KFU=)9Jg>$tGSA)sNZAj47;OH!?pi&B9UgOP!uk*=YkuAzB|p^25Tg_WV1 zwt=CQfkDi+hnrC}}D}`9;MAiOE1)bV@_# x10@yUlEsO+1vx-R>!u|pm*f{Q7*!bQ7*-g9u;;I3TYzdAJYD@<);T3K0RWQ-R$Bl7 literal 0 HcmV?d00001 diff --git a/recipes/icons/ittechblog.png b/recipes/icons/ittechblog.png new file mode 100644 index 0000000000000000000000000000000000000000..825e0255109370f9e9e555c29f58145411096f57 GIT binary patch literal 731 zcmV<10wn#3P)E4R$CTGm?Ys@0Y`yKv}@ zxh}fRTzSz4>tm#;F>MHbfRP3W7|xU+5Z*2fheTV?-T$2D{C3Xy?+^^bAX55ShoWC& zY}Og=HFnpk5CAgh6ibPWuW6{2_Q*u(x=23}26()Fc6*<;Yw-PSBLR?ztaBGEAjzf23}#n(tX(+WYdItlH6ESDwsF#uY}1yd zVgQil(jWCEM_DWYfDoBUr&uB*G7&vuURW;j2ZHPk0C&t_c9q7e&g3RrNTrOgi44l~ zctZxWQwac69O)DAQcd&r6-kLa>qU8yU@+3FEX>l7$M(t8079F9{PbbtoU04~7=|JJ zfgtOyYJYvl)dL6bU8y_FN~G2RfFKa6ua)#r(p-A{XmvsFz?@6)OQ zAQ)m2&fR)Cl%K;HH<(?ef%PEe{k6^}05sG{yT#dDJp}-&#)_7&iz{NEFTiF(4Ee>n zED3jHT5uPe7Tm?*HvwRmjIU`uU9JKmj>+o_Bk1U6Xj@i%4}zE0MIYg-x=o8>q@6RR0wFkW?`m$r>mX z3N3y3rw;;ceSxGu3fgYzuL^BMQ1BJoTC>TY)Iw>Iw5~}sZnF2@Gt)Vt_j2x;Gc)JA zGv7BScJ0X3N0U(isLyZjF36rRE28B4>1S_llMM$7xlSnk+&CqNCH>p?4gkcKnT2rb z%qnq)i}{zb;mJs>F15ZtSNc|5nPPTNvFk`*-~Piz=qhA0yLT2|aC_g_aDq=?gF@Q}9LPWlW$mp}j>5dH_zHO~* zVH;7isyv)LF;pEcH9+7g$pN^lR*z%z7&hN_n*KWTH8X3o0?h%FJm&OpNNcJQKbqGIeoZ_&VBdf6-A}*3vNAN;0EEDGue=<8aVs%l z4aSciuU+`wt32RHw7xUlGW5P&_@i$yhN2B)85?U{TXTD4Y1=cP)RlZ;`NdI#A~39k zil8O=cws?ZkHNAbkc=eB_6e}A|Xzj#xl5(4J>Bs-rpfaLiQOg1|@7J z)^8Byv;}jwekH{=NT-HDDL_kHUX~Mj3Jk3>AuMPnD#>WGgmIB%V*tuc`5ju4BG>?; zlvSPlebM4bdrD{#TYwjlNlG%X!assq3<)TNXJ;yRCd9L{r%R{XJs?H4giS6guWre_ zKp<(UUwV)tW8hRiIY~;1NZWoaIj`-lHiTwjVa~GsYhN$xh@-F( zw2V#C-~Yl2wV-RD?0B5QVGO)rdNw4DO7BKV^K3l-`qWnwLvzN?XLod$I`_T%A7%BG AtN;K2 literal 0 HcmV?d00001 diff --git a/recipes/icons/media2.png b/recipes/icons/media2.png new file mode 100644 index 0000000000000000000000000000000000000000..8e98c4df4eac0e7d071056862077e3a26a157654 GIT binary patch literal 660 zcmV;F0&D$=P)|#TY}iO{ zI9SPIPXW_SBkvaKEH@*EcMWw;@6K9Y{}8MQV^z($@w8JDz&v{Yi|5~Sezy8p$~2cJ z20MUzvn`%{4Gi~dQW0J(dXA1JY_BA30ReO8qh?com4R}?Xjz#064;wrwv`iRYM#MV zp(K)fL5na{=(ZL`Fk31mgoubo1l}$QGmDNifnuT|5i%=$TaO$cHIxgg+rL)mmbF;x zvCiYXr%we2j9`@DgJ&)C%&%AuY}1?_ALx)D&i)1jllhJA3ZMjn2&;Ldo(X5hOPtZ9M>|Bx|I&{T_>rVk`4;Z@CIT0YXW0us4(YJ?rtA^SwU!jPz5N}-T@M&DE4U{e{NzDA@?5BvXNB7 utye3oyGZ=^ls!X1qM%YRY%Xc6jr;{@lrN5sz~U+Z0000l4 literal 0 HcmV?d00001 diff --git a/recipes/icons/mobilna.png b/recipes/icons/mobilna.png new file mode 100644 index 0000000000000000000000000000000000000000..30db9287be6eac66714fffea4e67b9dcea415b08 GIT binary patch literal 885 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)aMxB z6XFU~aN*+>|ITNz7v5glx9sURpaW`degfJE1l2b_aqoC$cJx)^#rHsEPrv;E+HP_5HRq0J zO}9S>op}pnn;m*(e(06k8g5=-AZC{Y`GH~w3*g=H`Tm0k&$jT%UXPcSl`%UdD|cLu z(FjCIo_{36qgq3_%3eOVt8v%4%W_9!+vc{#%E^eIl94@a zt~Js8v@GA2XSZ+vd3x_Z&@RR#Z+91-#?67rKn`btM`SUO_5fqIli7Aa#%fO&#}J9j z$q7IpB0PPtI;jwoFWV_qM)ua_lY*!S@}ydrx0D zaic~0%yUCi~q(=xo&z*NBpo#FA92{lt BmFoZi literal 0 HcmV?d00001 diff --git a/recipes/icons/mojegotowanie.png b/recipes/icons/mojegotowanie.png new file mode 100644 index 0000000000000000000000000000000000000000..b9df6dc6d00f148aca415db0bed7467262851e83 GIT binary patch literal 307 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`zdT(WLo9leYvvyOJ-?CZK$Cw^ z$}jajv+k#SGBh|SvMI~T&;STFWLYI<{He@RJ$$>SuuZ=GnZX|22?-U^3XIc}o2*^$ z3ADPg#ZP-?_~-xU*$&HfLZ@P+HT%!F&(s zpoGPNjLNBcKmLC=KRo|;`U&F(SGWF$|G)S1$gBUHoZjH@Rgg{Zyb;^KM{j4e2`4i! z1Htro-Z^Jaob;c?_@eRD(_qh8j6aM-m6zU>WZ2m04#Z~MTny|MTjGa5D*Be)M4~0 zRa;-VIK7$d{~UB4uYcT#BO!?*qDY7YA`wIYKmk=HD1^|E1cbGFOG`;mrI%VF0cnSX zzr&+Y0M(WZ5J43Z4a#t68yt+8c)@Oq&Yg9qv2vsFQ74IC* zI$Z9Uos4;Of0?xh*C?GO&n-5$JiK$A$waVugZ0*f(h!u+Q#wcK9EEipbWb?$TW(#R z8~2BU0fn{Wy7CTGi3m#7HX8!VaFFq2{T+9g=Sk{@Jk7E*1RhjLLQqnZ5mW&%NsTdv z?e_<~UYupMGsE-ELrlL2gkotC`$lRQ}S~AJ1dJb*r9{60wAD`kWeL* z>iy_rnVcXqE$5}_}>VvKN3-xJQp`gZhgS1mlnv#1MTV( z$B2UTO+K}^e{xkuX5zG|{u)WOAsRVT6*>zS-u?mnq^_@X3saT=0000XmXtJzMM?%0hwA2}&<<|?41%*$ixfAv ztOdbM1SyDvKWGI-9NG*<8XMBQyd2ND$$fdx*NS)__nvdU`<fbvS(IjUbq*`A zDVi&{(fRNRbXZy@?IUe!7ig~CmC`zv7D`7$g_bD6mv_?N9>Oh=;!J74fBO=__ss~E z(fzfFjB92e=zO}K?|uvQ+ca}Xz4 zL4QyAyeAp$A0EIw0PaUIN8U?KsY8Rb5$*2*B1Cxq9Uyk`q9Kj`?-g|AJog#SpLcs7 zj)Aznm)KI@WChxFa|FHS8AH{MEb(__0?MsXoG84Jfe-ct^koqrwzlf6UE^eZj0};! y;%~lTZtTdbCwLQ62)=y;?>5Y?H$Dj`n~!yM`a$*m#*4~PC6s69i`ALM`@aDN!qLY7 literal 0 HcmV?d00001 diff --git a/recipes/icons/osw.png b/recipes/icons/osw.png new file mode 100644 index 0000000000000000000000000000000000000000..0693aee762bc1d096d00031294a76ac2685a7cbf GIT binary patch literal 489 zcmVDY3%JJ*h+KPqjWEz(E@){ z+vx;VJi_k)R#VOj+C{S7b$sr9skag(?R>Y= z+?@pOC#Ed$J;ePOcQQA-_MhN=Aai&d+j5~ItPi7VK8mKYe${zQJx-2BYYf+;U?NF6 f;5E(qLbv-rrqF>J7WGWp00000NkvXXu0mjf5R2hA literal 0 HcmV?d00001 diff --git a/recipes/icons/ppe_pl.png b/recipes/icons/ppe_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..42c9b42fa5092fa27c3e6ea8c36b6a142234dacd GIT binary patch literal 3203 zcmV-}41Dv6P)KLZ*U+IBfRsybQWXdwQbLP>6pAqfylh#{fb6;Z(vMMVS~$e@S=j*ftg6;Uhf59&ghTmgWD0l;*T zI709Y^p6lP1rIRMx#05C~cW=H_Aw*bJ-5DT&Z2n+x)QHX^p z00esgV8|mQcmRZ%02D^@S3L16t`O%c004NIvOKvYIYoh62rY33S640`D9%Y2D-rV&neh&#Q1i z007~1e$oCcFS8neI|hJl{-P!B1ZZ9hpmq0)X0i`JwE&>$+E?>%_LC6RbVIkUx0b+_+BaR3cnT7Zv!AJxW zizFb)h!jyGOOZ85F;a?DAXP{m@;!0_IfqH8(HlgRxt7s3}k3K`kFu>>-2Q$QMFfPW!La{h336o>X zu_CMttHv6zR;&ZNiS=X8v3CR#fknUxHUxJ0uoBa_M6WNWeqIg~6QE69c9o#eyhGvpiOA@W-aonk<7r1(?fC{oI5N*U!4 zfg=2N-7=cNnjjOr{yriy6mMFgG#l znCF=fnQv8CDz++o6_Lscl}eQ+l^ZHARH>?_s@|##Rr6KLRFA1%Q+=*RRWnoLsR`7U zt5vFIcfW3@?wFpwUVxrVZ>QdQz32KIeJ}k~{cZZE^+ya? z2D1z#2HOnI7(B%_ac?{wFUQ;QQA1tBKtrWrm0_3Rgps+?Jfqb{jYbcQX~taRB;#$y zZN{S}1|}gUOHJxc?wV3fxuz+mJ4`!F$IZ;mqRrNsHJd##*D~ju=bP7?-?v~|cv>vB zsJ6IeNwVZxrdjT`yl#bBIa#GxRa#xMMy;K#CDyyGyQdMSxlWT#tDe?p!?5wT$+oGt z8L;Kp2HUQ-ZMJ=3XJQv;x5ci*?vuTfeY$;({XGW_huIFR9a(?@3)XSs8O^N5RyOM=TTmp(3=8^+zpz2r)C z^>JO{deZfso3oq3?Wo(Y?l$ge?uXo;%ru`Vo>?<<(8I_>;8Eq#KMS9gFl*neeosSB zfoHYnBQIkwkyowPu(zdms`p{<7e4kra-ZWq<2*OsGTvEV%s0Td$hXT+!*8Bnh2KMe zBmZRodjHV?r+_5^X9J0WL4jKW`}lf%A-|44I@@LTvf1rHjG(ze6+w@Jt%Bvjts!X0 z?2xS?_ve_-kiKB_KiJlZ$9G`c^=E@oNG)mWWaNo-3TIW8)$Hg0Ub-~8?KhvJ>$ z3*&nim@mj(aCxE5!t{lw7O5^0EIO7zOo&c6l<+|iDySBWCGrz@C5{St!X3hAA}`T4 z(TLbXTq+(;@<=L8dXnssyft|w#WSTW<++3>sgS%(4NTpeI-VAqb|7ssJvzNHgOZVu zaYCvgO_R1~>SyL=cFU|~g|hy|Zi}}s9+d~lYqOB71z9Z$wnC=pR9Yz4DhIM>Wmjgu z&56o6maCpC&F##y%G;1PobR9i?GnNg;gYtchD%p19a!eQtZF&3JaKv33gZ<8D~47E ztUS1iwkmDaPpj=$m#%)jCVEY4fnLGNg2A-`YwHVD3gv};>)hAvT~AmqS>Lr``i7kw zJ{5_It`yrBmlc25DBO7E8;5VoznR>Ww5hAaxn$2~(q`%A-YuS64wkBy=9dm`4cXeX z4c}I@?e+FW+b@^RDBHV(wnMq2zdX3SWv9u`%{xC-q*U}&`cyXV(%rRT*Z6MH?i+i& z_B8C(+grT%{XWUQ+f@NoP1R=AW&26{v-dx)iK^-Nmiuj8txj!m?Z*Ss1N{dh4z}01 z)YTo*JycSU)+_5r4#yw9{+;i4Ee$peRgIj+;v;ZGdF1K$3E%e~4LaI(jC-u%2h$&R z9cLXcYC@Xwnns&bn)_Q~Te?roKGD|d-g^8;+aC{{G(1^(O7m37Y1-+6)01cN&y1aw zoqc{T`P^XJqPBbIW6s}d4{z_f5Om?vMgNQEJG?v2T=KYd^0M3I6IZxbny)%vZR&LD zJpPl@Psh8QyPB@KTx+@RdcC!KX7}kEo;S|j^u2lU7XQ}Oo;f|;z4Ll+_r>@1-xl3| zawq-H%e&ckC+@AhPrP6BKT#_XdT7&;F71j}Joy zkC~6lh7E@6o;W@^IpRNZ{ptLtL(gQ-CY~4mqW;US7Zxvm_|@yz&e53Bp_lTPlfP|z zrTyx_>lv@x#=^!PzR7qqF<$gm`|ZJZ+;<)Cqu&ot2z=00004XF*Lt006O$eEU(80000WV@Og>004R=004l4008;_004mL004C` z008P>0026e000+nl3&F}0004=NklBM1qE5NnXY?I4aq zkf@9hM3CSpYEUDz2FJjq*+p4x^lpGva(bl8A<3|HG}s z3K>W_3g+h3>gmx0t)p(T^H0IcvqlmhN6aMdBz&*)9qi+;8-6V6AGXOJY4+)4tizWL zi|oy2-Ztp;cW*r<%UF+U#n%nbt72Zvif%D1N<%R$ir1q@}qdTa*Ulk?0cV#HiSrj|GtzAMVAZ7|?9LCdD=_w0GP3Bz@q% zo0S)`w`G=%&)HykEQ!J;+ndU(E@Z8hRT#)CEJ@LucX!GCw3ZW!6+^wsg(Tiw^sK0p p3a^-;#yY2X%{J)aG^@P#YXBvlyOY4b$R_{*002ovPDHLkV1f>&`ZfRn literal 0 HcmV?d00001 diff --git a/recipes/icons/presseurop.png b/recipes/icons/presseurop.png new file mode 100644 index 0000000000000000000000000000000000000000..9967aac1fbe87108d8b86da4cf983ed27f00bec6 GIT binary patch literal 207 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!7%)r1n>(-?sK#oL!PlzjP-bUAnhyVQjx9-Bz zi%&i>#4fMga8;#lr^w|aCO|bso-U3d95Zcuy!o09cwE?5b>vFv1z2MXU>T`R4;m?c5O=UqKQT~ z*f;9`sQqGlF}9;$Z@$9jU(!;|bwB$TEz6D!vp$z0yjEsY8E^V=@ymaJwljFT`njxg HN@xNAOX^Zp literal 0 HcmV?d00001 diff --git a/recipes/icons/res_publica.png b/recipes/icons/res_publica.png new file mode 100644 index 0000000000000000000000000000000000000000..7c21e9d96e04836f6881267d90f20653017f3945 GIT binary patch literal 733 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)TbQa z6XN>+|Nq}#zJGi1?%T_E-(S7|`}6nLr*H1A+JCWU#kHB6-d%n0@87?_zkZ)>TDaCY zb)8xIdh?7MbGE)Xc4f0;!8~5i*{sfUI9yljCO+MF?$@WU+x;u1GMG5? z_1Rk+tg>b>*v@3I+n>;Wb?W;4@$IX05*CU2?}==h#p*bp-)nbh{U(R}namE;8LV~$ zR{j3^{qOHT7kgLEWOSIt>^O(h^=#vUKRay>FX7$(Ocb1etrD<?c6kSqI)8osw!^n_27}F^^qzlz z{vJx}Ih5YJ)xGrY>H|;rpP$9zG>6?~qfO3_w;z9h`>`*kWgfT3TyFRG*B(6Bc=W-h zqp#21(hCoC0flptx4R2N2dk_Hki%Kv5n0T@z;^_M8K-LVNdpB}c)B=-NL)@%NJvRa zOH55jNPhm{$)mJ{ga;3wJ{A{95fBm+6cwJ%z~$lPdHTeuliDXvX=t6)VxIbG#fnv$ ztFkk)GGD)V^-`K)szi~rA3BXzX1(chO$v=EcI3ja?2L4Gp|_b}l2M zjg4sPo{9-9dunS^|NObg!gPU^=^~3rbCdI*i7Y8ooDLsSe!w&_FzKRU(jgAUwi`Qs z{Mf;g5?)FK#IZ0z|ct7&`{UFGQ`lx%GlV- q#7x`3(8|Ezb@?|@6b-rgDVb@NxHZhu@>>biz~JfX=d#Wzp$P!TjXop* literal 0 HcmV?d00001 diff --git a/recipes/icons/wolne_media.png b/recipes/icons/wolne_media.png new file mode 100644 index 0000000000000000000000000000000000000000..78d72713ab1d0d9e420527dd02205e3878d643ad GIT binary patch literal 497 zcmV(_`g8%^e{{R4h=>PzAFaQARU;qF*m;eA5Z<1fd zMgRZ-cS%G+RCwB)lQC}FKo9_Db&EivLLF(k%>(iRH!jlo0|H$58Tmv0kRL>VTmQgK z>bxMmY4Idlbbvc;3Q@AD7%`x&<`&q+Vs}nHef|C$@SGMw%WPZ$(-Rx0thzsqbLs(7 zia4hXSO7aJ>vJi^p!xU%-DV9yp5+66-n49OiyYM(AR^>hj<)TJtEQDbFQB(Vdf}H&)wZG)XZ2eYgCo(HqkUSK+lgfO~CsJ>vapjbUGn~ z7KoS`BLG!JRRI|=7*#dn_Vy><&+vZ7RW*Cr;(Kcr5xhS`WVeae4D2S@$NmDxoKk#~ z2Mx@O5FRnJ0lzE49_o6YGDk79+1t=nkqVR`v z6@{lL&KQjxMN#cW?m4B1h#+Rxbepv|vp0nq$%t|G@gUgbm28tN4=*bJU)g^Zr&sy% n+YjaRP+e}GYapKbo&OF1K5)1Xmq^9e00000NkvXXu0mjfImF8g literal 0 HcmV?d00001 diff --git a/recipes/ittechblog.recipe b/recipes/ittechblog.recipe new file mode 100644 index 0000000000..ba2bc8e045 --- /dev/null +++ b/recipes/ittechblog.recipe @@ -0,0 +1,27 @@ +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.ittechblog.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class ittechblog(BasicNewsRecipe): + title = u'IT techblog' + __author__ = 'MrStefan ' + language = 'pl' + description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.' + extra_css = '.cover > img {display:block;}' + remove_empty_feeds = True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[dict(attrs={'class':'box'})] + remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})] + + feeds = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')] diff --git a/recipes/magazyn_consido.recipe b/recipes/magazyn_consido.recipe new file mode 100644 index 0000000000..d24c66d6a4 --- /dev/null +++ b/recipes/magazyn_consido.recipe @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +''' +magazynconsido.pl/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image + +class magazynconsido(BasicNewsRecipe): + title = u'Magazyn Consido' + __author__ = 'Artur Stachecki ,teepel ' + language = 'pl' + description =u'Portal dla architektów i projektantów' + masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1' + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1')) + keep_only_tags.append(dict(name = 'p')) + keep_only_tags.append(dict(attrs = {'class' : 'navigation'})) + remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })] + + remove_tags_after =[dict(attrs = {'class' : 'navigation' })] + + extra_css=''' img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;} + h1 {text-align: center;}''' + + def parse_index(self): #(kk) + soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml') + feeds = [] + articles = {} + sections = [] + section = '' + + for item in soup.findAll('item') : + section = self.tag_to_string(item.category) + if not articles.has_key(section) : + sections.append(section) + articles[section] = [] + article_url = self.tag_to_string(item.guid) + article_title = self.tag_to_string(item.title) + article_date = self.tag_to_string(item.pubDate) + article_description = self.tag_to_string(item.description) + articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date, 'description' : article_description }) + + for section in sections : + if section == 'Video': + feeds.append((section, articles[section])) + feeds.pop() + else: + feeds.append((section, articles[section])) + return feeds + + def append_page(self, soup, appendtag): + apage = soup.find('div', attrs={'class':'wp-pagenavi'}) + if apage is not None: + nexturl = soup.find('a', attrs={'class':'nextpostslink'}) + soup2 = self.index_to_soup(nexturl['href']) + pagetext = soup2.findAll('p') + for tag in pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, tag) + + while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None: + appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('') + + def preprocess_html(self, soup): #(kk) + self.append_page(soup, soup.body) + return self.adeify_images(soup) + + def postprocess_html(self, soup, first): + #process all the images + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + if img < 0: + raise RuntimeError('Out of memory') + img.type = "GrayscaleType" + img.save(iurl) + return soup diff --git a/recipes/media2.recipe b/recipes/media2.recipe new file mode 100644 index 0000000000..3c9ef3231e --- /dev/null +++ b/recipes/media2.recipe @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'teepel' + +''' +media2.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class media2_pl(BasicNewsRecipe): + title = u'Media2' + __author__ = 'teepel ' + language = 'pl' + description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' + masthead_url='http://media2.pl/res/logo/www.png' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + simultaneous_downloads = 5 + + extra_css = '''.news-lead{font-weight: bold; }''' + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) + + remove_tags =[] + remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) + + + feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] diff --git a/recipes/mobilna.recipe b/recipes/mobilna.recipe new file mode 100644 index 0000000000..624a431935 --- /dev/null +++ b/recipes/mobilna.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.mobilna.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class mobilna(BasicNewsRecipe): + title = u'Mobilna.pl' + __author__ = 'MrStefan ' + language = 'pl' + description =u'twoja mobilna strona' + #masthead_url='' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + use_embedded_content = True + #keep_only_tags =[dict(attrs={'class':'Post'})] + + feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')] diff --git a/recipes/mojegotowanie.recipe b/recipes/mojegotowanie.recipe new file mode 100644 index 0000000000..e4d514b2a8 --- /dev/null +++ b/recipes/mojegotowanie.recipe @@ -0,0 +1,51 @@ +#!usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan, teepel' + +''' +www.mojegotowanie.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class mojegotowanie(BasicNewsRecipe): + title = u'Moje Gotowanie' + __author__ = 'MrStefan , teepel ' + language = 'pl' + description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.' + masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif' + cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'})) + + feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'), + (u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if 'film' in article.title: + feed.articles.remove(article) + return feeds + + def get_article_url(self, article): + link = article.get('link') + if 'Clayout0Cset0Cprint0' in link: + return link + + def print_version(self, url): + segment = url.split('/') + URLPart = segment[-2] + URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/') + URLPart = URLPart.replace('0I', '_') + URLPart = URLPart.replace('0C', '/') + return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart diff --git a/recipes/najwyzszy_czas.recipe b/recipes/najwyzszy_czas.recipe new file mode 100644 index 0000000000..6d8420d216 --- /dev/null +++ b/recipes/najwyzszy_czas.recipe @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +nczas.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class nczas(BasicNewsRecipe): + title = u'Najwy\u017cszy Czas' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z nczas.com' + INDEX='http://nczas.com' + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = True + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + remove_attributes = ['style'] + no_stylesheets=True + + feeds = [(u'Najwyższy Czas', u'http://nczas.com/feed/')] diff --git a/recipes/nowiny_rybnik.recipe b/recipes/nowiny_rybnik.recipe new file mode 100644 index 0000000000..11337d49af --- /dev/null +++ b/recipes/nowiny_rybnik.recipe @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class NowinyRybnik(BasicNewsRecipe): + title = u'Nowiny - Rybnik' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic' + oldest_article = 7 + masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))] + + remove_tags = [] + remove_tags.append(dict(name='div', attrs={'id': 'footer'})) + + feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/osw.recipe b/recipes/osw.recipe new file mode 100644 index 0000000000..5e5d7c6ef0 --- /dev/null +++ b/recipes/osw.recipe @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +http://www.osw.waw.pl - Osrodek studiow wschodnich +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class OSW_Recipe(BasicNewsRecipe): + + language = 'pl' + title = u'Ośrodek Studiów Wschodnich' + __author__ = 'teepel ' + INDEX='http://www.osw.waw.pl' + description = u'Ośrodek Studiów Wschodnich im. Marka Karpia. Centre for Eastern Studies.' + category = u'News' + oldest_article = 7 + max_articles_per_feed = 100 + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + remove_javascript = True + simultaneous_downloads = 5 + + keep_only_tags =[] + #this line should show title of the article, but it doesnt work + keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'table', attrs = {'id' : 'attachments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + + feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')] + + def print_version(self, url): + return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/') diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe new file mode 100644 index 0000000000..d1d01c2961 --- /dev/null +++ b/recipes/ppe_pl.recipe @@ -0,0 +1,41 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class ppeRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + + title = u'ppe.pl' + category = u'News' + description = u'Portal o konsolach i grach wideo.' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 1 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 2 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'})) + + remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'})) + + feeds = [ + ('Newsy', 'feed://ppe.pl/rss/rss.xml'), + ] diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe new file mode 100644 index 0000000000..3a2f3209cf --- /dev/null +++ b/recipes/presseurop.recipe @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +''' +www.presseurop.eu/pl +''' + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class presseurop(BasicNewsRecipe): + title = u'Presseurop' + description = u'Najlepsze artykuły z prasy europejskiej' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [ + (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'), + (u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'), + (u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'), + (u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'), + (u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed') + ] + + + preprocess_regexps = [ + (re.compile(r'\|.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), +] diff --git a/recipes/res_publica.recipe b/recipes/res_publica.recipe new file mode 100644 index 0000000000..29d7c558e7 --- /dev/null +++ b/recipes/res_publica.recipe @@ -0,0 +1,34 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image + +class ResPublicaNowaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'intromatyk ' + language = 'pl' + version = 1 + + title = u'Res Publica Nowa' + category = u'News' + description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 7 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 5 + + feeds = [ + ('Artykuly', 'feed://publica.pl/feed'), + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/wolne_media.recipe b/recipes/wolne_media.recipe new file mode 100644 index 0000000000..4dde5b22b5 --- /dev/null +++ b/recipes/wolne_media.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +wolnemedia.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class wolne_media(AutomaticNewsRecipe): + title = u'Wolne Media' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z wolnemedia.net' + INDEX='http://wolnemedia.net' + oldest_article = 1 + max_articles_per_feed = 100 + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + no_stylesheets=True + auto_cleanup = True + + feeds = [(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),(u'Media', u'http://wolnemedia.net/category/media/feed/'),(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')] From dc568659e7df7a04dce3683d4b14aa1dfbefb593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Tue, 2 Apr 2013 00:37:23 +0200 Subject: [PATCH 06/22] changes by fenuks --- recipes/gazeta-prawna-calibre-v1.recipe | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/recipes/gazeta-prawna-calibre-v1.recipe b/recipes/gazeta-prawna-calibre-v1.recipe index 293aa05b0d..f7d2c4935b 100644 --- a/recipes/gazeta-prawna-calibre-v1.recipe +++ b/recipes/gazeta-prawna-calibre-v1.recipe @@ -14,13 +14,14 @@ class gazetaprawna(BasicNewsRecipe): title = u'Gazeta Prawna' __author__ = u'Vroo' publisher = u'Infor Biznes' - oldest_article = 7 + oldest_article = 1 max_articles_per_feed = 20 no_stylesheets = True remove_javascript = True description = 'Polski dziennik gospodarczy' language = 'pl' encoding = 'utf-8' + ignore_duplicate_articles = {'title', 'url'} remove_tags_after = [ dict(name='div', attrs={'class':['data-art']}) @@ -30,7 +31,7 @@ class gazetaprawna(BasicNewsRecipe): ] feeds = [ - (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'), + (u'Z ostatniej chwili', u'http://rss.gazetaprawna.pl/GazetaPrawna'), (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'), (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'), (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'), @@ -51,3 +52,8 @@ class gazetaprawna(BasicNewsRecipe): url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna') url = url.replace('praca.gazetaprawna', 'www.gazetaprawna') return url + + def get_cover_url(self): + soup = self.index_to_soup('http://www.egazety.pl/infor/e-wydanie-dziennik-gazeta-prawna.html') + self.cover_url = soup.find('p', attrs={'class':'covr'}).a['href'] + return getattr(self, 'cover_url', self.cover_url) From 1dfea2ab04fbe2ec3a0087871a8b69e8a0dc7fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Tue, 2 Apr 2013 00:57:49 +0200 Subject: [PATCH 07/22] remove obsolete re ; update change Artur's nickname into real name --- recipes/dzial_zagraniczny.recipe | 6 ++---- recipes/focus_pl.recipe | 7 +++---- recipes/ittechblog.recipe | 1 - recipes/km_blog.recipe | 1 + recipes/kp.recipe | 3 +-- recipes/media2.recipe | 2 -- recipes/mobilna.recipe | 1 - recipes/mojegotowanie.recipe | 1 - recipes/najwyzszy_czas.recipe | 1 - recipes/nowiny_rybnik.recipe | 2 -- recipes/osw.recipe | 1 - recipes/ppe_pl.recipe | 6 +++--- recipes/res_publica.recipe | 6 ++++-- recipes/wolne_media.recipe | 1 - 14 files changed, 14 insertions(+), 25 deletions(-) diff --git a/recipes/dzial_zagraniczny.recipe b/recipes/dzial_zagraniczny.recipe index 9709186d7e..1b8453dd40 100644 --- a/recipes/dzial_zagraniczny.recipe +++ b/recipes/dzial_zagraniczny.recipe @@ -8,7 +8,6 @@ dzialzagraniczny.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class dzial_zagraniczny(BasicNewsRecipe): title = u'Dział Zagraniczny' @@ -21,9 +20,8 @@ class dzial_zagraniczny(BasicNewsRecipe): cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg' max_articles_per_feed = 100 remove_empty_feeds = True - simultaneous_downloads = 5 remove_javascript = True no_stylesheets = True use_embedded_content = True - - feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')] \ No newline at end of file + + feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')] diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe index 66864b8561..e13e51a15a 100644 --- a/recipes/focus_pl.recipe +++ b/recipes/focus_pl.recipe @@ -1,12 +1,11 @@ -import re +#!/usr/bin/env python +__license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe - class FocusRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/ittechblog.recipe b/recipes/ittechblog.recipe index ba2bc8e045..3fa557d11e 100644 --- a/recipes/ittechblog.recipe +++ b/recipes/ittechblog.recipe @@ -6,7 +6,6 @@ www.ittechblog.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class ittechblog(BasicNewsRecipe): title = u'IT techblog' diff --git a/recipes/km_blog.recipe b/recipes/km_blog.recipe index 8910ee060a..614dbc03e5 100644 --- a/recipes/km_blog.recipe +++ b/recipes/km_blog.recipe @@ -20,6 +20,7 @@ class km_blog(BasicNewsRecipe): remove_javascript=True no_stylesheets=True remove_empty_feeds = True + feeds = [(u'blog', u'http://korwin-mikke.pl/blog/rss')] keep_only_tags =[] diff --git a/recipes/kp.recipe b/recipes/kp.recipe index 85bf356b4d..3a2bc62eb0 100644 --- a/recipes/kp.recipe +++ b/recipes/kp.recipe @@ -2,8 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KrytykaPolitycznaRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/media2.recipe b/recipes/media2.recipe index 3c9ef3231e..135740a62e 100644 --- a/recipes/media2.recipe +++ b/recipes/media2.recipe @@ -8,7 +8,6 @@ media2.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class media2_pl(BasicNewsRecipe): title = u'Media2' @@ -33,5 +32,4 @@ class media2_pl(BasicNewsRecipe): remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) - feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] diff --git a/recipes/mobilna.recipe b/recipes/mobilna.recipe index 624a431935..68ae011438 100644 --- a/recipes/mobilna.recipe +++ b/recipes/mobilna.recipe @@ -8,7 +8,6 @@ www.mobilna.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class mobilna(BasicNewsRecipe): title = u'Mobilna.pl' diff --git a/recipes/mojegotowanie.recipe b/recipes/mojegotowanie.recipe index e4d514b2a8..4b0de4a0e1 100644 --- a/recipes/mojegotowanie.recipe +++ b/recipes/mojegotowanie.recipe @@ -8,7 +8,6 @@ www.mojegotowanie.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class mojegotowanie(BasicNewsRecipe): title = u'Moje Gotowanie' diff --git a/recipes/najwyzszy_czas.recipe b/recipes/najwyzszy_czas.recipe index 6d8420d216..9c4a82c4ea 100644 --- a/recipes/najwyzszy_czas.recipe +++ b/recipes/najwyzszy_czas.recipe @@ -8,7 +8,6 @@ nczas.com ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class nczas(BasicNewsRecipe): title = u'Najwy\u017cszy Czas' diff --git a/recipes/nowiny_rybnik.recipe b/recipes/nowiny_rybnik.recipe index 11337d49af..e00a72e09b 100644 --- a/recipes/nowiny_rybnik.recipe +++ b/recipes/nowiny_rybnik.recipe @@ -3,8 +3,6 @@ __license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe -import re - class NowinyRybnik(BasicNewsRecipe): title = u'Nowiny - Rybnik' diff --git a/recipes/osw.recipe b/recipes/osw.recipe index 5e5d7c6ef0..8022f3e346 100644 --- a/recipes/osw.recipe +++ b/recipes/osw.recipe @@ -8,7 +8,6 @@ http://www.osw.waw.pl - Osrodek studiow wschodnich ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class OSW_Recipe(BasicNewsRecipe): diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe index d1d01c2961..2edc611ad7 100644 --- a/recipes/ppe_pl.recipe +++ b/recipes/ppe_pl.recipe @@ -1,12 +1,12 @@ -import re +#!/usr/bin/env python + +__license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe class ppeRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' __author__ = u'Artur Stachecki ' language = 'pl' - version = 1 title = u'ppe.pl' category = u'News' diff --git a/recipes/res_publica.recipe b/recipes/res_publica.recipe index 29d7c558e7..1c806d4a85 100644 --- a/recipes/res_publica.recipe +++ b/recipes/res_publica.recipe @@ -1,11 +1,13 @@ -import re +#!/usr/bin/env python + +__license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.magick import Image class ResPublicaNowaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/wolne_media.recipe b/recipes/wolne_media.recipe index 4dde5b22b5..b0e34c8bdf 100644 --- a/recipes/wolne_media.recipe +++ b/recipes/wolne_media.recipe @@ -8,7 +8,6 @@ wolnemedia.net ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class wolne_media(AutomaticNewsRecipe): title = u'Wolne Media' From 354da83e7feac36a3bd33ce63da629b35af53dd5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 09:24:58 +0530 Subject: [PATCH 08/22] ToC Editor: Add buttons to indent/unindent teh current entry --- src/calibre/gui2/toc/main.py | 180 ++++++++++++++++++++++++----------- 1 file changed, 127 insertions(+), 53 deletions(-) diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index de5ed91bcd..4e8d1f3424 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -339,7 +339,7 @@ class ItemView(QFrame): # {{{ # }}} -class TreeWidget(QTreeWidget): +class TreeWidget(QTreeWidget): # {{{ def __init__(self, parent): QTreeWidget.__init__(self, parent) @@ -357,6 +357,7 @@ class TreeWidget(QTreeWidget): self.setAnimated(True) self.setMouseTracking(True) self.in_drop_event = False + self.root = self.invisibleRootItem() def iteritems(self, parent=None): if parent is None: @@ -384,6 +385,104 @@ class TreeWidget(QTreeWidget): ans = sorted(ans, key=lambda x:sort_map.get(x, -1), reverse=True) return ans + def highlight_item(self, item): + self.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect) + self.scrollToItem(item) + + def move_left(self): + item = self.currentItem() + if item is not None: + parent = item.parent() + if parent is not None: + is_expanded = item.isExpanded() or item.childCount() == 0 + gp = parent.parent() or self.invisibleRootItem() + idx = gp.indexOfChild(parent) + for gc in [parent.child(i) for i in xrange(parent.indexOfChild(item)+1, parent.childCount())]: + parent.removeChild(gc) + item.addChild(gc) + parent.removeChild(item) + gp.insertChild(idx+1, item) + if is_expanded: + self.expandItem(item) + self.highlight_item(item) + + def move_right(self): + item = self.currentItem() + if item is not None: + parent = item.parent() or self.invisibleRootItem() + idx = parent.indexOfChild(item) + if idx > 0: + is_expanded = item.isExpanded() + np = parent.child(idx-1) + parent.removeChild(item) + np.addChild(item) + if is_expanded: + self.expandItem(item) + self.highlight_item(item) + + def move_down(self): + item = self.currentItem() + if item is None: + if self.root.childCount() == 0: + return + item = self.root.child(0) + self.highlight_item(item) + return + parent = item.parent() or self.root + idx = parent.indexOfChild(item) + if idx == parent.childCount() - 1: + # At end of parent, need to become sibling of parent + if parent is self.root: + return + gp = parent.parent() or self.root + parent.removeChild(item) + gp.insertChild(gp.indexOfChild(parent)+1, item) + else: + sibling = parent.child(idx+1) + parent.removeChild(item) + sibling.insertChild(0, item) + self.highlight_item(item) + + def move_up(self): + item = self.currentItem() + if item is None: + if self.root.childCount() == 0: + return + item = self.root.child(self.root.childCount()-1) + self.highlight_item(item) + return + parent = item.parent() or self.root + idx = parent.indexOfChild(item) + if idx == 0: + # At end of parent, need to become sibling of parent + if parent is self.root: + return + gp = parent.parent() or self.root + parent.removeChild(item) + gp.insertChild(gp.indexOfChild(parent), item) + else: + sibling = parent.child(idx-1) + parent.removeChild(item) + sibling.addChild(item) + self.highlight_item(item) + + def keyPressEvent(self, ev): + if ev.key() == Qt.Key_Left and ev.modifiers() & Qt.CTRL: + self.move_left() + ev.accept() + elif ev.key() == Qt.Key_Right and ev.modifiers() & Qt.CTRL: + self.move_right() + ev.accept() + elif ev.key() == Qt.Key_Up and ev.modifiers() & Qt.CTRL: + self.move_up() + ev.accept() + elif ev.key() == Qt.Key_Down and ev.modifiers() & Qt.CTRL: + self.move_down() + ev.accept() + else: + return super(TreeWidget, self).keyPressEvent(ev) +# }}} + class TOCView(QWidget): # {{{ add_new_item = pyqtSignal(object, object) @@ -393,27 +492,43 @@ class TOCView(QWidget): # {{{ l = self.l = QGridLayout() self.setLayout(l) self.tocw = t = TreeWidget(self) - l.addWidget(t, 0, 0, 5, 3) + l.addWidget(t, 0, 0, 7, 3) self.up_button = b = QToolButton(self) b.setIcon(QIcon(I('arrow-up.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) l.addWidget(b, 0, 3) - b.setToolTip(_('Move current entry up')) + b.setToolTip(_('Move current entry up [Ctrl+Up]')) b.clicked.connect(self.move_up) + + self.left_button = b = QToolButton(self) + b.setIcon(QIcon(I('back.png'))) + b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) + l.addWidget(b, 2, 3) + b.setToolTip(_('Unindent the current entry [Ctrl+Left]')) + b.clicked.connect(self.tocw.move_left) + self.del_button = b = QToolButton(self) b.setIcon(QIcon(I('trash.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) - l.addWidget(b, 2, 3) + l.addWidget(b, 3, 3) b.setToolTip(_('Remove all selected entries')) b.clicked.connect(self.del_items) + + self.left_button = b = QToolButton(self) + b.setIcon(QIcon(I('forward.png'))) + b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) + l.addWidget(b, 4, 3) + b.setToolTip(_('Unindent the current entry [Ctrl+Left]')) + b.clicked.connect(self.tocw.move_right) + self.down_button = b = QToolButton(self) b.setIcon(QIcon(I('arrow-down.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) - l.addWidget(b, 4, 3) - b.setToolTip(_('Move current entry down')) + l.addWidget(b, 6, 3) + b.setToolTip(_('Move current entry down [Ctrl+Down]')) b.clicked.connect(self.move_down) self.expand_all_button = b = QPushButton(_('&Expand all')) - col = 5 + col = 7 l.addWidget(b, col, 0) b.clicked.connect(self.tocw.expandAll) self.collapse_all_button = b = QPushButton(_('&Collapse all')) @@ -484,54 +599,13 @@ class TOCView(QWidget): # {{{ self.tocw.setCurrentItem(None) def highlight_item(self, item): - self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect) - self.tocw.scrollToItem(item) - - def move_down(self): - item = self.tocw.currentItem() - if item is None: - if self.root.childCount() == 0: - return - item = self.root.child(0) - self.highlight_item(item) - return - parent = item.parent() or self.root - idx = parent.indexOfChild(item) - if idx == parent.childCount() - 1: - # At end of parent, need to become sibling of parent - if parent is self.root: - return - gp = parent.parent() or self.root - parent.removeChild(item) - gp.insertChild(gp.indexOfChild(parent)+1, item) - else: - sibling = parent.child(idx+1) - parent.removeChild(item) - sibling.insertChild(0, item) - self.highlight_item(item) + self.tocw.highlight_item(item) def move_up(self): - item = self.tocw.currentItem() - if item is None: - if self.root.childCount() == 0: - return - item = self.root.child(self.root.childCount()-1) - self.highlight_item(item) - return - parent = item.parent() or self.root - idx = parent.indexOfChild(item) - if idx == 0: - # At end of parent, need to become sibling of parent - if parent is self.root: - return - gp = parent.parent() or self.root - parent.removeChild(item) - gp.insertChild(gp.indexOfChild(parent), item) - else: - sibling = parent.child(idx-1) - parent.removeChild(item) - sibling.addChild(item) - self.highlight_item(item) + self.tocw.move_up() + + def move_down(self): + self.tocw.move_down() def update_status_tip(self, item): c = item.data(0, Qt.UserRole).toPyObject() From 2eceaeb2aba9d3bde0d934da1066125e908574e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 09:28:38 +0530 Subject: [PATCH 09/22] ... --- src/calibre/gui2/toc/main.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index 4e8d1f3424..c54629b862 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -190,7 +190,7 @@ class ItemView(QFrame): # {{{ ))) l.addWidget(b) - self.fal = b = QPushButton(_('Flatten the ToC')) + self.fal = b = QPushButton(_('&Flatten the ToC')) b.clicked.connect(self.flatten_toc) b.setToolTip(textwrap.fill(_( 'Flatten the Table of Contents, putting all entries at the top level' @@ -466,6 +466,11 @@ class TreeWidget(QTreeWidget): # {{{ sibling.addChild(item) self.highlight_item(item) + def del_items(self): + for item in self.selectedItems(): + p = item.parent() or self.root + p.removeChild(item) + def keyPressEvent(self, ev): if ev.key() == Qt.Key_Left and ev.modifiers() & Qt.CTRL: self.move_left() @@ -479,6 +484,9 @@ class TreeWidget(QTreeWidget): # {{{ elif ev.key() == Qt.Key_Down and ev.modifiers() & Qt.CTRL: self.move_down() ev.accept() + elif ev.key() in (Qt.Key_Delete, Qt.Key_Backspace): + self.del_items() + ev.accept() else: return super(TreeWidget, self).keyPressEvent(ev) # }}} @@ -559,9 +567,7 @@ class TOCView(QWidget): # {{{ return unicode(item.data(0, Qt.DisplayRole).toString()) def del_items(self): - for item in self.tocw.selectedItems(): - p = item.parent() or self.root - p.removeChild(item) + self.tocw.del_items() def delete_current_item(self): item = self.tocw.currentItem() From 800f0c19ff9e4a3c1cebd1e4bc0f575b8171a8eb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 10:02:39 +0530 Subject: [PATCH 10/22] ToC Editor: Right-click menu to perform various useful actions on entries in the ToC --- src/calibre/gui2/toc/main.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index c54629b862..74886bbf63 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -14,7 +14,7 @@ from functools import partial from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog, QDialog, QVBoxLayout, QDialogButtonBox, QSize, QStackedWidget, QWidget, QLabel, Qt, pyqtSignal, QIcon, QTreeWidget, QGridLayout, QTreeWidgetItem, - QToolButton, QItemSelectionModel) + QToolButton, QItemSelectionModel, QCursor) from calibre.ebooks.oeb.polish.container import get_container, AZW3Container from calibre.ebooks.oeb.polish.toc import ( @@ -358,6 +358,8 @@ class TreeWidget(QTreeWidget): # {{{ self.setMouseTracking(True) self.in_drop_event = False self.root = self.invisibleRootItem() + self.setContextMenuPolicy(Qt.CustomContextMenu) + self.customContextMenuRequested.connect(self.show_context_menu) def iteritems(self, parent=None): if parent is None: @@ -471,6 +473,12 @@ class TreeWidget(QTreeWidget): # {{{ p = item.parent() or self.root p.removeChild(item) + def title_case(self): + from calibre.utils.titlecase import titlecase + for item in self.selectedItems(): + t = unicode(item.data(0, Qt.DisplayRole).toString()) + item.setData(0, Qt.DisplayRole, titlecase(t)) + def keyPressEvent(self, ev): if ev.key() == Qt.Key_Left and ev.modifiers() & Qt.CTRL: self.move_left() @@ -489,6 +497,25 @@ class TreeWidget(QTreeWidget): # {{{ ev.accept() else: return super(TreeWidget, self).keyPressEvent(ev) + + def show_context_menu(self, point): + item = self.currentItem() + if item is not None: + m = QMenu() + ci = unicode(item.data(0, Qt.DisplayRole).toString()) + p = item.parent() or self.invisibleRootItem() + idx = p.indexOfChild(item) + if idx > 0: + m.addAction(QIcon(I('arrow-up.png')), _('Move "%s" up')%ci, self.move_up) + if idx + 1 < p.childCount(): + m.addAction(QIcon(I('arrow-down.png')), _('Move "%s" down')%ci, self.move_down) + m.addAction(QIcon(I('trash.png')), _('Remove all selected items'), self.del_items) + if item.parent() is not None: + m.addAction(QIcon(I('back.png')), _('Unindent "%s"')%ci, self.move_left) + if idx > 0: + m.addAction(QIcon(I('forward.png')), _('Indent "%s"')%ci, self.move_right) + m.addAction(_('Change all selected items to title case'), self.title_case) + m.exec_(QCursor.pos()) # }}} class TOCView(QWidget): # {{{ From 17743799bce332091ce2f4cc1be777aa13023090 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 10:13:14 +0530 Subject: [PATCH 11/22] Remove the google images plugin for now, will probably release it next week, after more testing --- src/calibre/customize/builtins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index c87c8c63d0..474617c911 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -757,9 +757,9 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.ozon import Ozon -from calibre.ebooks.metadata.sources.google_images import GoogleImages +# from calibre.ebooks.metadata.sources.google_images import GoogleImages -plugins += [GoogleBooks, Amazon, Edelweiss, GoogleImages, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] +plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] # }}} From 01dedbd3574c88e0ccc68477f38bbc8fc36b8304 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 11:25:25 +0530 Subject: [PATCH 12/22] Fix #1163115 (samsung android phone not recognized) --- src/calibre/devices/android/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 95a00a315c..36ab076417 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -239,7 +239,7 @@ class ANDROID(USBMS): 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS', - 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1'] + 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', From 42561905d7ec9e13431cac0d26e3749afd70cb06 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 12:51:51 +0530 Subject: [PATCH 13/22] Work on the view interface --- src/calibre/db/view.py | 127 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 118 insertions(+), 9 deletions(-) diff --git a/src/calibre/db/view.py b/src/calibre/db/view.py index e9de69e320..e0f99eede0 100644 --- a/src/calibre/db/view.py +++ b/src/calibre/db/view.py @@ -7,7 +7,9 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import weakref from functools import partial +from itertools import izip, imap def sanitize_sort_field_name(field_metadata, field): field = field_metadata.search_term_to_field_key(field.lower().strip()) @@ -15,11 +17,39 @@ def sanitize_sort_field_name(field_metadata, field): field = {'title': 'sort', 'authors':'author_sort'}.get(field, field) return field +class MarkedVirtualField(object): + + def __init__(self, marked_ids): + self.marked_ids = marked_ids + + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + for book_id in candidates: + yield self.marked_ids.get(book_id, default_value), {book_id} + +class TableRow(list): + + def __init__(self, book_id, view): + self.book_id = book_id + self.view = weakref.ref(view) + + def __getitem__(self, obj): + view = self.view() + if isinstance(obj, slice): + return [view._field_getters[c](self.book_id) + for c in xrange(*obj.indices(len(view._field_getters)))] + else: + return view._field_getters[obj](self.book_id) + class View(object): + ''' A table view of the database, with rows and columns. Also supports + filtering and sorting. ''' + def __init__(self, cache): self.cache = cache self.marked_ids = {} + self.search_restriction_book_count = 0 + self.search_restriction = '' self._field_getters = {} for col, idx in cache.backend.FIELD_MAP.iteritems(): if isinstance(col, int): @@ -38,16 +68,33 @@ class View(object): except KeyError: self._field_getters[idx] = partial(self.get, col) - self._map = list(self.cache.all_book_ids()) - self._map_filtered = list(self._map) + self._map = tuple(self.cache.all_book_ids()) + self._map_filtered = tuple(self._map) @property def field_metadata(self): return self.cache.field_metadata def _get_id(self, idx, index_is_id=True): - ans = idx if index_is_id else self.index_to_id(idx) - return ans + return idx if index_is_id else self.index_to_id(idx) + + def __getitem__(self, row): + return TableRow(self._map_filtered[row], self.cache) + + def __len__(self): + return len(self._map_filtered) + + def __iter__(self): + for book_id in self._map_filtered: + yield self._data[book_id] + + def iterall(self): + for book_id in self._map: + yield self[book_id] + + def iterallids(self): + for book_id in self._map: + yield book_id def get_field_map_field(self, row, col, index_is_id=True): ''' @@ -66,7 +113,7 @@ class View(object): def get_ondevice(self, idx, index_is_id=True, default_value=''): id_ = idx if index_is_id else self.index_to_id(idx) - self.cache.field_for('ondevice', id_, default_value=default_value) + return self.cache.field_for('ondevice', id_, default_value=default_value) def get_marked(self, idx, index_is_id=True, default_value=None): id_ = idx if index_is_id else self.index_to_id(idx) @@ -93,7 +140,7 @@ class View(object): ans.append(self.cache._author_data(id_)) return tuple(ans) - def multisort(self, fields=[], subsort=False): + def multisort(self, fields=[], subsort=False, only_ids=None): fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields] keys = self.field_metadata.sortable_field_keys() fields = [x for x in fields if x[0] in keys] @@ -102,8 +149,70 @@ class View(object): if not fields: fields = [('timestamp', False)] - sorted_book_ids = self.cache.multisort(fields) - sorted_book_ids - # TODO: change maps + sorted_book_ids = self.cache.multisort(fields, ids_to_sort=only_ids) + if only_ids is None: + self._map = tuple(sorted_book_ids) + if len(self._map_filtered) == len(self._map): + self._map_filtered = tuple(self._map) + else: + fids = frozenset(self._map_filtered) + self._map_filtered = tuple(i for i in self._map if i in fids) + else: + smap = {book_id:i for i, book_id in enumerate(sorted_book_ids)} + only_ids.sort(key=smap.get) + def search(self, query, return_matches=False): + ans = self.search_getting_ids(query, self.search_restriction, + set_restriction_count=True) + if return_matches: + return ans + self._map_filtered = tuple(ans) + + def search_getting_ids(self, query, search_restriction, + set_restriction_count=False): + q = '' + if not query or not query.strip(): + q = search_restriction + else: + q = query + if search_restriction: + q = u'(%s) and (%s)' % (search_restriction, query) + if not q: + if set_restriction_count: + self.search_restriction_book_count = len(self._map) + return list(self._map) + matches = self.cache.search( + query, search_restriction, virtual_fields={'marked':MarkedVirtualField(self.marked_ids)}) + rv = [x for x in self._map if x in matches] + if set_restriction_count and q == search_restriction: + self.search_restriction_book_count = len(rv) + return rv + + def set_search_restriction(self, s): + self.search_restriction = s + + def search_restriction_applied(self): + return bool(self.search_restriction) + + def get_search_restriction_book_count(self): + return self.search_restriction_book_count + + def set_marked_ids(self, id_dict): + ''' + ids in id_dict are "marked". They can be searched for by + using the search term ``marked:true``. Pass in an empty dictionary or + set to clear marked ids. + + :param id_dict: Either a dictionary mapping ids to values or a set + of ids. In the latter case, the value is set to 'true' for all ids. If + a mapping is provided, then the search can be used to search for + particular values: ``marked:value`` + ''' + if not hasattr(id_dict, 'items'): + # Simple list. Make it a dict of string 'true' + self.marked_ids = dict.fromkeys(id_dict, u'true') + else: + # Ensure that all the items in the dict are text + self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode, + id_dict.itervalues()))) From 7b864ff15b0bf9730ba06e2bc7ba215f615b2514 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 2 Apr 2013 10:27:41 +0200 Subject: [PATCH 14/22] Remove subclass that seems not to work in some cases when the plugin is dynamically loaded --- .../gui2/store/stores/amazon_de_plugin.py | 40 ++++++++---------- .../gui2/store/stores/amazon_es_plugin.py | 39 ++++++++--------- .../gui2/store/stores/amazon_fr_plugin.py | 32 +++++++------- .../gui2/store/stores/amazon_it_plugin.py | 40 ++++++++---------- .../gui2/store/stores/amazon_uk_plugin.py | 42 ++++++++++--------- 5 files changed, 90 insertions(+), 103 deletions(-) diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 7b4027794a..6833bd3710 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,13 +18,26 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonDEKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + aff_id = {'tag': 'charhale0a-21'} + store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' + '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454' + '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') + store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8' + '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' + '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') + search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + author_article = 'von ' + + and_word = ' und ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonDEKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale0a-21'} - store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' - '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454' - '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') - store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8' - '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' - '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') - search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'von ' - - and_word = ' und ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py index 68387ffe11..0b71ae657b 100644 --- a/src/calibre/gui2/store/stores/amazon_es_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,12 +18,25 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonESKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + aff_id = {'tag': 'charhale09-21'} + store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&' + 'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790') + store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s' + '&linkCode=ur2&camp=3626&creative=24790') + search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'de ' + + and_word = ' y ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonESKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale09-21'} - store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&' - 'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790') - store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s' - '&linkCode=ur2&camp=3626&creative=24790') - search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'de ' - - and_word = ' y ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index 9b425a2fc9..4520a3a104 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,13 +18,22 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonFRKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + aff_id = {'tag': 'charhale-21'} + store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id + store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' + search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + author_article = 'de ' + + and_word = ' et ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonFRKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale-21'} - store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id - store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' - search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'de ' - - and_word = ' et ' diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py index 2493f78ea3..f8a756d1d5 100644 --- a/src/calibre/gui2/store/stores/amazon_it_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,12 +18,25 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonITKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + aff_id = {'tag': 'httpcharles07-21'} + store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&' + 'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322') + store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&' + 'linkCode=ur2&camp=3370&creative=23322') + search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'di ' + + and_word = ' e ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass - -class AmazonITKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'httpcharles07-21'} - store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&' - 'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322') - store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&' - 'linkCode=ur2&camp=3370&creative=23322') - search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'di ' - - and_word = ' e ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 054072824b..f6082ac790 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. -class AmazonEUBase(StorePlugin): +class AmazonUKKindleStore(StorePlugin): + aff_id = {'tag': 'calcharles-21'} + store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.co.uk/Kindle-eBooks/b?' + 'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&' + 'linkCode=ur2&camp=1634&creative=19450') + store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&' + 'linkCode=ur2&camp=1634&creative=6738') + search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'by ' + + and_word = ' and ' + + # This code is copy/pasted from from here to the other amazon EU. Do not + # modify it in any other amazon EU plugin. Be sure to paste it into all + # other amazon EU plugins when modified. + + # ---- Copy from here to end + ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonUKKindleStore(AmazonEUBase): - aff_id = {'tag': 'calcharles-21'} - store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.co.uk/Kindle-eBooks/b?' - 'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&' - 'linkCode=ur2&camp=1634&creative=19450') - store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&' - 'linkCode=ur2&camp=1634&creative=6738') - search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'by ' - - and_word = ' and ' - From 9161d924aad670573e5c997df65d9b215bc19be7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 14:00:16 +0530 Subject: [PATCH 15/22] Update Business Week --- recipes/bwmagazine.recipe | 65 ++++----------------------------------- 1 file changed, 6 insertions(+), 59 deletions(-) diff --git a/recipes/bwmagazine.recipe b/recipes/bwmagazine.recipe index d11861ce08..ae3197da81 100644 --- a/recipes/bwmagazine.recipe +++ b/recipes/bwmagazine.recipe @@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe): , 'language' : language } - #remove_tags = [ - #dict(attrs={'class':'inStory'}) - #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td']) - #,dict(attrs={'id':['inset','videoDisplay']}) - #] - #keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})] - remove_attributes = ['lang'] - match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*'] - feeds = [ - (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'), - (u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ), - (u'Asia', u'http://www.businessweek.com/rss/asia.rss'), - (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'), - (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'), - (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'), - (u'Europe', u'http://www.businessweek.com/rss/europe.rss'), - (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'), - (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'), - (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'), - (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'), - (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'), - (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'), - (u'Technology', u'http://www.businessweek.com/rss/technology.rss'), - (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'), - (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'), - (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'), - (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'), - (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'), - (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'), + (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'), ] - def get_article_url(self, article): - url = article.get('guid', None) - if 'podcasts' in url: - return None - if 'surveys' in url: - return None - if 'images' in url: - return None - if 'feedroom' in url: - return None - if '/magazine/toc/' in url: - return None - rurl, sep, rest = url.rpartition('?') - if rurl: - return rurl - return rest - def print_version(self, url): - if '/news/' in url or '/blog/ in url': - return url - rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/') - return rurl.replace('/investing/','/investor/') + soup = self.index_to_soup(url) + prntver = soup.find('li', attrs={'class':'print tracked'}) + rurl = prntver.find('a', href=True)['href'] + return rurl + - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - From f3257d9865dd8bf23e0cd4008b0c166737424485 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Apr 2013 18:55:19 +0530 Subject: [PATCH 16/22] Fix #1163272 (Text Bug in Content) --- src/calibre/translations/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index b0e512c6bf..0655ceb4ee 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -22507,7 +22507,7 @@ msgstr "Autoren beginnend mit '%s'" #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3477 #, python-format msgid "Authors beginning with '%s'" -msgstr "Autoren beginnen mit mit %s" +msgstr "Autoren beginnen mit %s" #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3518 msgid "NCX for Recently Added" From 7076d8c2f5576f096ec89189d93b021bae4ce080 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Apr 2013 08:32:01 +0530 Subject: [PATCH 17/22] Fix #1163659 (Wrong filename output in error message when "Guide reference not found") --- src/calibre/ebooks/oeb/reader.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 68db089073..d0474fa7e8 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -333,8 +333,8 @@ class OEBReader(object): guide = self.oeb.guide manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): - href = elem.get('href') - path = urlnormalize(urldefrag(href)[0]) + ref_href = elem.get('href') + path = urlnormalize(urldefrag(ref_href)[0]) if path not in manifest.hrefs: corrected_href = None for href in manifest.hrefs: @@ -342,12 +342,12 @@ class OEBReader(object): corrected_href = href break if corrected_href is None: - self.logger.warn(u'Guide reference %r not found' % href) + self.logger.warn(u'Guide reference %r not found' % ref_href) continue - href = corrected_href + ref_href = corrected_href typ = elem.get('type') if typ not in guide: - guide.add(typ, elem.get('title'), href) + guide.add(typ, elem.get('title'), ref_href) def _find_ncx(self, opf): result = xpath(opf, '/o2:package/o2:spine/@toc') From fe1f2c79259aad4286a7c9194e8ab341c66b495c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Apr 2013 09:58:58 +0530 Subject: [PATCH 18/22] ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files. Fixes #1163520 (Request for new method to generate entries in ToC editor) --- src/calibre/ebooks/oeb/polish/toc.py | 29 ++++++++++++++++++++++++++++ src/calibre/gui2/toc/main.py | 21 +++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 3a72b837c8..c84dd1b094 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -262,6 +262,35 @@ def from_links(container): toc.remove(child) return toc +def find_text(node): + LIMIT = 200 + pat = re.compile(r'\s+') + for child in node: + if isinstance(child, etree._Element): + text = xml2text(child).strip() + text = pat.sub(' ', text) + if len(text) < 1: + continue + if len(text) > LIMIT: + # Look for less text in a child of this node, recursively + ntext = find_text(child) + return ntext or (text[:LIMIT] + '...') + else: + return text + +def from_files(container): + toc = TOC() + for spinepath in container.spine_items: + name = container.abspath_to_name(spinepath) + root = container.parsed(name) + body = XPath('//h:body')(root) + if not body: + continue + text = find_text(body[0]) + if text: + toc.add(text, name) + return toc + def add_id(container, name, loc): root = container.parsed(name) body = root.xpath('//*[local-name()="body"]')[0] diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index 74886bbf63..7cb4f9b462 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog, from calibre.ebooks.oeb.polish.container import get_container, AZW3Container from calibre.ebooks.oeb.polish.toc import ( - get_toc, add_id, TOC, commit_toc, from_xpaths, from_links) + get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files) from calibre.gui2 import Application, error_dialog, gprefs from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.toc.location import ItemEdit @@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{ go_to_root = pyqtSignal() create_from_xpath = pyqtSignal(object) create_from_links = pyqtSignal() + create_from_files = pyqtSignal() flatten_toc = pyqtSignal() def __init__(self, parent): @@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{ ))) l.addWidget(b) + self.cfb = b = QPushButton(_('Generate ToC from &files')) + b.clicked.connect(self.create_from_files) + b.setToolTip(textwrap.fill(_( + 'Generate a Table of Contents from individual files in the book.' + ' Each entry in the ToC will point to the start of the file, the' + ' text of the entry will be the "first line" of text from the file.' + ))) + l.addWidget(b) + self.xpb = b = QPushButton(_('Generate ToC from &XPath')) b.clicked.connect(self.create_from_user_xpath) b.setToolTip(textwrap.fill(_( @@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{ i.add_new_item.connect(self.add_new_item) i.create_from_xpath.connect(self.create_from_xpath) i.create_from_links.connect(self.create_from_links) + i.create_from_files.connect(self.create_from_files) i.flatten_item.connect(self.flatten_item) i.flatten_toc.connect(self.flatten_toc) i.go_to_root.connect(self.go_to_root) @@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{ _('No links were found that could be added to the Table of Contents.'), show=True) self.insert_toc_fragment(toc) + def create_from_files(self): + toc = from_files(self.ebook) + if len(toc) == 0: + return error_dialog(self, _('No items found'), + _('No files were found that could be added to the Table of Contents.'), show=True) + self.insert_toc_fragment(toc) + + # }}} class TOCEditor(QDialog): # {{{ From 3823f8da9f2b5c6c8d7f222a6ccf505ac43c0682 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Apr 2013 09:49:14 +0530 Subject: [PATCH 19/22] Update A List Apart --- recipes/list_apart.recipe | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/recipes/list_apart.recipe b/recipes/list_apart.recipe index 35cbaad958..c11956110f 100644 --- a/recipes/list_apart.recipe +++ b/recipes/list_apart.recipe @@ -1,33 +1,23 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals from calibre.web.feeds.news import BasicNewsRecipe class AListApart (BasicNewsRecipe): - __author__ = u'Marc Busqué ' + __author__ = 'Marc Busqué ' __url__ = 'http://www.lamarciana.com' - __version__ = '1.0' + __version__ = '2.0' __license__ = 'GPL v3' - __copyright__ = u'2012, Marc Busqué ' + __copyright__ = '2012, Marc Busqué ' title = u'A List Apart' - description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.' + description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.' language = 'en' tags = 'web development, software' oldest_article = 120 remove_empty_feeds = True - no_stylesheets = True encoding = 'utf8' cover_url = u'http://alistapart.com/pix/alalogo.gif' - keep_only_tags = [ - dict(name='div', attrs={'id': 'content'}) - ] - remove_tags = [ - dict(name='ul', attrs={'id': 'metastuff'}), - dict(name='div', attrs={'class': 'discuss'}), - dict(name='div', attrs={'class': 'discuss'}), - dict(name='div', attrs={'id': 'learnmore'}), - ] - remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] - extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}' + extra_css = u'img {max-width: 100%; display: block; margin: auto;}' feeds = [ - (u'A List Apart', u'http://www.alistapart.com/site/rss'), + (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'), ] From d65fd352e9ecb56a7e31dda3ff239ba77a91d5ae Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Apr 2013 10:38:11 +0530 Subject: [PATCH 20/22] Update Sing Tao Daily - Hong Kong and Apple Daily - Hong Kong. AM730 by Eddie Lau --- recipes/am730.recipe | 290 ++++++++++++++++++++++++++++ recipes/apple_daily.recipe | 386 ++++++++++++++++++++++++------------- recipes/singtaohk.recipe | 29 ++- 3 files changed, 553 insertions(+), 152 deletions(-) create mode 100644 recipes/am730.recipe diff --git a/recipes/am730.recipe b/recipes/am730.recipe new file mode 100644 index 0000000000..0fac4bea51 --- /dev/null +++ b/recipes/am730.recipe @@ -0,0 +1,290 @@ +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals +__license__ = 'GPL v3' +__copyright__ = '2013, Eddie Lau' +__Date__ = '' +__HiResImg__ = True + +''' +Change Log: +2013/03/30 -- first version +''' + +from calibre import (__appname__, force_unicode, strftime) +from calibre.utils.date import now as nowf +import os, datetime, re +from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation +from calibre.utils.localization import canonicalize_lang + +class AppleDaily(BasicNewsRecipe): + title = u'AM730' + __author__ = 'Eddie Lau' + publisher = 'AM730' + oldest_article = 1 + max_articles_per_feed = 100 + auto_cleanup = False + language = 'zh' + encoding = 'utf-8' + auto_cleanup = False + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + description = 'http://www.am730.com.hk' + category = 'Chinese, News, Hong Kong' + masthead_url = 'http://www.am730.com.hk/images/logo.jpg' + + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}), + dict(name='div', attrs={'class':'thecontent wordsnap'}), + dict(name='a', attrs={'class':'lightboximg'})] + remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}), + dict(name='img', attrs={'src':'/images/am_endmark.gif'})] + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time - at HKT 6am, all news are available + return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24) + + def get_fetchdate(self): + if __Date__ <> '': + return __Date__ + else: + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + if __Date__ <> '': + return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] + else: + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchyear(self): + if __Date__ <> '': + return __Date__[0:4] + else: + return self.get_dtlocal().strftime("%Y") + + def get_fetchmonth(self): + if __Date__ <> '': + return __Date__[4:6] + else: + return self.get_dtlocal().strftime("%m") + + def get_fetchday(self): + if __Date__ <> '': + return __Date__[6:8] + else: + return self.get_dtlocal().strftime("%d") + + # Note: does not work with custom date given by __Date__ + def get_weekday(self): + return self.get_dtlocal().weekday() + + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + + def parse_index(self): + feeds = [] + soup = self.index_to_soup('http://www.am730.com.hk/') + ul = soup.find(attrs={'class':'nav-section'}) + sectionList = [] + for li in ul.findAll('li'): + a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False) + title = li.find('a').get('title', False).strip() + sectionList.append((title, a)) + for title, url in sectionList: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def parse_section(self, url): + soup = self.index_to_soup(url) + items = soup.findAll(attrs={'style':'padding-bottom: 15px;'}) + current_articles = [] + for item in items: + a = item.find(attrs={'class':'t6 f14'}).find('a', href=True) + articlelink = 'http://www.am730.com.hk/' + a.get('href', True) + title = self.tag_to_string(a) + description = self.tag_to_string(item.find(attrs={'class':'t3 f14'})) + current_articles.append({'title': title, 'url': articlelink, 'description': description}) + return current_articles + + def preprocess_html(self, soup): + multia = soup.findAll('a') + for a in multia: + if not (a == None): + image = a.find('img') + if not (image == None): + if __HiResImg__: + image['src'] = image.get('src').replace('/thumbs/', '/') + caption = image.get('alt') + tag = Tag(soup, "photo", []) + tag2 = Tag(soup, "photocaption", []) + tag.insert(0, image) + if not caption == None: + tag2.insert(0, caption) + tag.insert(1, tag2) + a.replaceWith(tag) + return soup + + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + title = self.short_title() + if self.output_profile.periodical_date_in_title: + title += strftime(self.timefmt) + mi = MetaInformation(title, [__appname__]) + mi.publisher = __appname__ + mi.author_sort = __appname__ + if self.publication_type: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + mi.timestamp = nowf() + article_titles, aseen = [], set() + for f in feeds: + for a in f: + if a.title and a.title not in aseen: + aseen.add(a.title) + article_titles.append(force_unicode(a.title, 'utf-8')) + + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + + '\n\n'.join(article_titles)) + + language = canonicalize_lang(self.language) + if language is not None: + mi.language = language + # This one affects the pub date shown in kindle title + #mi.pubdate = nowf() + # now appears to need the time field to be > 12.00noon as well + mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') + + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) + + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) + + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) + + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) + + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' + + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} + + + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + tt = a.toc_thumbnail if a.toc_thumbnail else None + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, + a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, + description=desc, toc_thumbnail=tt) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, __appname__, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) + diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index 763136c9b0..522427ed6a 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -1,161 +1,275 @@ -# -*- coding: utf-8 -*- -import re +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals +__license__ = 'GPL v3' +__copyright__ = '2013, Eddie Lau' +__Date__ = '' + +from calibre import (__appname__, force_unicode, strftime) +from calibre.utils.date import now as nowf +import os, datetime, re from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation +from calibre.utils.localization import canonicalize_lang class AppleDaily(BasicNewsRecipe): - - title = u'蘋果日報' - __author__ = u'蘋果日報' - __publisher__ = u'蘋果日報' - description = u'蘋果日報' - masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' - language = 'zh_TW' - encoding = 'UTF-8' - timefmt = ' [%a, %d %b, %Y]' - needs_subscription = False + title = u'蘋果日報 (香港)' + __author__ = 'Eddie Lau' + publisher = '蘋果日報' + oldest_article = 1 + max_articles_per_feed = 100 + auto_cleanup = False + language = 'zh' + encoding = 'utf-8' + auto_cleanup = False remove_javascript = True - remove_tags_before = dict(name=['ul', 'h1']) - remove_tags_after = dict(name='form') - remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), - dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), - dict(name=['script', 'noscript', 'style', 'form'])] + use_embedded_content = False no_stylesheets = True - extra_css = ''' - @font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n - body {margin-right: 8pt; font-family: 'uming', serif;} - h1 {font-family: 'uming', serif, sans-serif} - ''' - #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' + description = 'http://hkm.appledaily.com/' + category = 'Chinese, News, Hong Kong' + masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png' - preprocess_regexps = [ - (re.compile(r'img.php?server=(?P[^&]+)&path=(?P[^&]+).*', re.DOTALL|re.IGNORECASE), - lambda match: 'http://' + match.group('server') + '/' + match.group('path')), - ] + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='div', attrs={'id':'content-article'})] + remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}), + dict(name='p', attrs={'class':'next'})] + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time - at HKT 6am, all news are available + return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24) + + def get_fetchdate(self): + if __Date__ <> '': + return __Date__ + else: + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + if __Date__ <> '': + return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] + else: + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchyear(self): + if __Date__ <> '': + return __Date__[0:4] + else: + return self.get_dtlocal().strftime("%Y") + + def get_fetchmonth(self): + if __Date__ <> '': + return __Date__[4:6] + else: + return self.get_dtlocal().strftime("%m") + + def get_fetchday(self): + if __Date__ <> '': + return __Date__[6:8] + else: + return self.get_dtlocal().strftime("%d") + + # Note: does not work with custom date given by __Date__ + def get_weekday(self): + return self.get_dtlocal().weekday() def get_cover_url(self): - return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' - - - #def get_browser(self): - #br = BasicNewsRecipe.get_browser(self) - #if self.username is not None and self.password is not None: - # br.open('http://www.nytimes.com/auth/login') - # br.select_form(name='login') - # br['USERID'] = self.username - # br['PASSWORD'] = self.password - # br.submit() - #return br - - def preprocess_html(self, soup): - #process all the images - for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): - iurl = tag['src'] - #print 'checking image: ' + iurl - - #img\.php?server\=(?P[^&]+)&path=(?P[^&]+) - p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE) - - m = p.search(iurl) - - if m is not None: - iurl = 'http://' + m.group('server') + '/' + m.group('path') - #print 'working! new url: ' + iurl - tag['src'] = iurl - #else: - #print 'not good' - - for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')): - iurl = tag['href'] - #print 'checking image: ' + iurl - - #img\.php?server\=(?P[^&]+)&path=(?P[^&]+) - p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE) - - m = p.search(iurl) - - if m is not None: - iurl = 'http://' + m.group('server') + '/' + m.group('path') - #print 'working! new url: ' + iurl - tag['href'] = iurl - #else: - #print 'not good' - - return soup + soup = self.index_to_soup('http://hkm.appledaily.com/') + cover = soup.find(attrs={'class':'top-news'}).get('src', False) + br = BasicNewsRecipe.get_browser(self) + try: + br.open(cover) + except: + cover = None + return cover + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) def parse_index(self): - base = 'http://news.hotpot.hk/fruit' - soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php') + feeds = [] + soup = self.index_to_soup('http://hkm.appledaily.com/') + ul = soup.find(attrs={'class':'menu'}) + sectionList = [] + for li in ul.findAll('li'): + a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False) + title = li.find('a', text=True).strip() + if not title == u'動新聞': + sectionList.append((title, a)) + for title, url in sectionList: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds - #def feed_title(div): - # return ''.join(div.findAll(text=True, recursive=False)).strip() + def parse_section(self, url): + soup = self.index_to_soup(url) + ul = soup.find(attrs={'class':'list'}) + current_articles = [] + for li in ul.findAll('li'): + a = li.find('a', href=True) + title = li.find('p', text=True).strip() + if a is not None: + current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)}) + pass + return current_articles - articles = {} - key = None - ans = [] - for div in soup.findAll('li'): - key = div.find(text=True, recursive=True); - #if key == u'豪情': - # continue; + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + title = self.short_title() + if self.output_profile.periodical_date_in_title: + title += strftime(self.timefmt) + mi = MetaInformation(title, [__appname__]) + mi.publisher = __appname__ + mi.author_sort = __appname__ + if self.publication_type: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + mi.timestamp = nowf() + article_titles, aseen = [], set() + for f in feeds: + for a in f: + if a.title and a.title not in aseen: + aseen.add(a.title) + article_titles.append(force_unicode(a.title, 'utf-8')) - print 'section=' + key + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + + '\n\n'.join(article_titles)) - articles[key] = [] + language = canonicalize_lang(self.language) + if language is not None: + mi.language = language + # This one affects the pub date shown in kindle title + #mi.pubdate = nowf() + # now appears to need the time field to be > 12.00noon as well + mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') - ans.append(key) + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) - a = div.find('a', href=True) + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) - if not a: - continue + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) - url = base + '/' + a['href'] - print 'url=' + url + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) - if not articles.has_key(key): - articles[key] = [] - else: - # sub page - subSoup = self.index_to_soup(url) + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' - for subDiv in subSoup.findAll('li'): - subA = subDiv.find('a', href=True) - subTitle = subDiv.find(text=True, recursive=True) - subUrl = base + '/' + subA['href'] - - print 'subUrl' + subUrl - - articles[key].append( - dict(title=subTitle, - url=subUrl, - date='', - description='', - content='')) + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} -# elif div['class'] in ['story', 'story headline']: -# a = div.find('a', href=True) -# if not a: -# continue -# url = re.sub(r'\?.*', '', a['href']) -# url += '?pagewanted=all' -# title = self.tag_to_string(a, use_alt=True).strip() -# description = '' -# pubdate = strftime('%a, %d %b') -# summary = div.find(True, attrs={'class':'summary'}) -# if summary: -# description = self.tag_to_string(summary, use_alt=False) -# -# feed = key if key is not None else 'Uncategorized' -# if not articles.has_key(feed): -# articles[feed] = [] -# if not 'podcasts' in url: -# articles[feed].append( -# dict(title=title, url=url, date=pubdate, -# description=description, -# content='')) -# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) - ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)] - return ans + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + tt = a.toc_thumbnail if a.toc_thumbnail else None + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, + a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, + description=desc, toc_thumbnail=tt) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, __appname__, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) diff --git a/recipes/singtaohk.recipe b/recipes/singtaohk.recipe index d830381731..bb76c335a0 100644 --- a/recipes/singtaohk.recipe +++ b/recipes/singtaohk.recipe @@ -1,30 +1,30 @@ +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2011, Eddie Lau' +__copyright__ = '2011-2013, Eddie Lau' # data source: normal, mobile __Source__ = 'mobile' # please replace the following "True" with "False". (Default: True) __MakePeriodical__ = True # Turn below to True if your device supports display of CJK titles (Default: False) -__UseChineseTitle__ = False +__UseChineseTitle__ = True # Set it to False if you want to skip images (Default: True) __KeepImages__ = True # Set it to True if you want to include a summary in Kindle's article view (Default: False) -__IncludeSummary__ = False +__IncludeSummary__ = True # Set it to True if you want thumbnail images in Kindle's article view (Default: True) __IncludeThumbnails__ = True ''' Change Log: +2013/03/31 -- fix cover retrieval code and heading size, and remove   in summary 2011/12/29 -- first version done -TODO: -* use alternative source at http://m.singtao.com/index.php ''' from calibre.utils.date import now as nowf import os, datetime, re -from datetime import date from calibre.web.feeds.recipes import BasicNewsRecipe from contextlib import nested from calibre.ebooks.BeautifulSoup import BeautifulSoup @@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe): title = 'Sing Tao Daily - Hong Kong' description = 'Hong Kong Chinese Newspaper (http://singtao.com)' category = 'Chinese, News, Hong Kong' - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}' masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png' if __Source__ == 'normal': keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})] @@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe): return self.get_dtlocal().strftime("%d") def get_cover_url(self): - #cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29 - base = 2660 - todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday())) - diff = todaydate - date(2011, 12, 29) - base = base + int(diff.total_seconds()/(3600*24)) - cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg' + soup = self.index_to_soup('http://m.singtao.com/') + cover = soup.find(attrs={'class':'special'}).get('src', False) br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - cover = 'http://singtao.com/images/stlogo.gif' + cover = None return cover def parse_index(self): @@ -289,11 +285,11 @@ class STHKRecipe(BasicNewsRecipe): # the text may or may not be enclosed in

tag paras = articlebody.findAll('p') if not paras: - paras = articlebody + paras = articlebody textFound = False for p in paras: if not textFound: - summary_candidate = self.tag_to_string(p).strip() + summary_candidate = self.tag_to_string(p).strip().replace(' ', '') if len(summary_candidate) > 0: summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1) article.summary = article.text_summary = summary_candidate @@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe): + From b1cc151ed781ef549b2a5c71cc86ba92c312595e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Apr 2013 12:38:29 +0530 Subject: [PATCH 21/22] Add libimobiledevice to linux builds --- setup/installer/linux/freeze2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index cbf0363fc9..44b8e81bff 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -47,6 +47,10 @@ binary_includes = [ '/usr/lib/libgthread-2.0.so.0', '/usr/lib/libpng14.so.14', '/usr/lib/libexslt.so.0', + # Ensure that libimobiledevice is compiled against openssl, not gnutls + '/usr/lib/libimobiledevice.so.3', + '/usr/lib/libusbmuxd.so.2', + '/usr/lib/libplist.so.1', MAGICK_PREFIX+'/lib/libMagickWand.so.5', MAGICK_PREFIX+'/lib/libMagickCore.so.5', '/usr/lib/libgcrypt.so.11', From 2d4746a39d33cc646125576bf4ac8b6e50179194 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Apr 2013 12:52:49 +0530 Subject: [PATCH 22/22] Add libimobiledevice to OS X build --- setup/installer/osx/app/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 345b75f56f..2182038088 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -399,7 +399,8 @@ class Py2App(object): @flush def add_fontconfig(self): info('\nAdding fontconfig') - for x in ('fontconfig.1', 'freetype.6', 'expat.1'): + for x in ('fontconfig.1', 'freetype.6', 'expat.1', + 'plist.1', 'usbmuxd.2', 'imobiledevice.3'): src = os.path.join(SW, 'lib', 'lib'+x+'.dylib') self.install_dylib(src) dst = os.path.join(self.resources_dir, 'fonts')