From f7a247233678a9ef0b8d1da1163c91327b453f0b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 26 Sep 2013 18:31:54 +0530 Subject: [PATCH 01/88] Fix #1231419 [error in manual page for "Integrating the calibre content server into other servers"](https://bugs.launchpad.net/calibre/+bug/1231419) --- manual/server.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manual/server.rst b/manual/server.rst index 20d4434a28..b5aa203ea8 100644 --- a/manual/server.rst +++ b/manual/server.rst @@ -104,7 +104,7 @@ Save this adapter as :file:`calibre-wsgi-adpater.py` somewhere your server will Let's suppose that we want to use WSGI in Apache. First enable WSGI in Apache by adding the following to :file:`httpd.conf`:: - LoadModule proxy_module modules/mod_wsgi.so + LoadModule wsgi_module modules/mod_wsgi.so The exact technique for enabling the wsgi module will vary depending on your Apache installation. Once you have the proxy modules enabled, add the following rules to httpd.conf (or if you are using virtual hosts to the conf file for the virtual host in question:: From 8920cf6caed6de9fe99d18b01901602ff3406619 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 27 Sep 2013 08:38:29 +0530 Subject: [PATCH 02/88] Driver for Sunstech reader Fixes #1231590 [cannot connect my sunstech e-reader](https://bugs.launchpad.net/calibre/+bug/1231590) --- src/calibre/devices/teclast/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 9e42b3dbde..22fafcdb10 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -20,9 +20,9 @@ class TECLAST_K3(USBMS): BCD = [0x0000, 0x0100] VENDOR_NAME = ['TECLAST', 'IMAGIN', 'RK28XX', 'PER3274B', 'BEBOOK', - 'RK2728', 'MR700'] + 'RK2728', 'MR700', 'CYBER'] WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['DIGITAL_PLAYER', 'TL-K5', - 'EREADER', 'USB-MSC', 'PER3274B', 'BEBOOK', 'USER'] + 'EREADER', 'USB-MSC', 'PER3274B', 'BEBOOK', 'USER', 'BOOK'] MAIN_MEMORY_VOLUME_LABEL = 'K3 Main Memory' STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card' From ee8c9d5c919e7e41d122109990c7461b32d28b42 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Fri, 27 Sep 2013 17:05:33 +0200 Subject: [PATCH 03/88] Add a tweak to change the image compression quality used by the content server when it makes thumbnails. --- resources/default_tweaks.py | 7 +++++++ src/calibre/library/server/content.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 51bb84883b..2cf8a4296b 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -550,3 +550,10 @@ highlight_virtual_library = 'yellow' # all available output formats to be present. restrict_output_formats = None +#: Set the thumbnail image quality used by the content server +# The quality of a thumbnail is largely controlled by the compression quality +# used when creating it. Set this to a larger number to improve the quality. +# Note that the thumbnails get much larger with larger compression quality +# numbers. +# The value can be between 50 and 99 +content_server_thumbnail_compression_quality = 70 \ No newline at end of file diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index cdc569f8b7..0c0405aa50 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -18,6 +18,7 @@ from calibre.utils.magick.draw import (save_cover_data_to, Image, thumbnail as generate_thumbnail) from calibre.utils.filenames import ascii_filename from calibre.ebooks.metadata.opf2 import metadata_to_opf +from calibre.utils.config import tweaks plugboard_content_server_value = 'content_server' plugboard_content_server_formats = ['epub', 'mobi', 'azw3'] @@ -175,8 +176,13 @@ class ContentServer(object): cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) if thumbnail: - return generate_thumbnail(cover, - width=thumb_width, height=thumb_height)[-1] + quality = tweaks['content_server_thumbnail_compression_quality'] + if quality < 50: + quality = 50 + elif quality > 99: + quality = 99 + return generate_thumbnail(cover, width=thumb_width, + height=thumb_height, compression_quality=quality)[-1] img = Image() img.load(cover) From 974efed829a7e6a14f1150e5be0c9fd38f9648f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 29 Sep 2013 14:59:39 +0200 Subject: [PATCH 04/88] Remove Bookoteka store, as they don't sell ebooks any more --- src/calibre/customize/builtins.py | 11 --- .../gui2/store/stores/bookoteka_plugin.py | 76 ------------------- 2 files changed, 87 deletions(-) delete mode 100644 src/calibre/gui2/store/stores/bookoteka_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 97f2135744..98888ff818 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1342,16 +1342,6 @@ class StoreBiblioStore(StoreBase): headquarters = 'BG' formats = ['EPUB, PDF'] -class StoreBookotekaStore(StoreBase): - name = 'Bookoteka' - author = u'Tomasz Długosz' - description = u'E-booki w Bookotece dostępne są w formacie EPUB oraz PDF. Publikacje sprzedawane w Bookotece są objęte prawami autorskimi. Zobowiązaliśmy się chronić te prawa, ale bez ograniczania dostępu do książki użytkownikowi, który nabył ją w legalny sposób. Dlatego też Bookoteka stosuje tak zwany „watermarking transakcyjny” czyli swego rodzaju znaki wodne.' # noqa - actual_plugin = 'calibre.gui2.store.stores.bookoteka_plugin:BookotekaStore' - - drm_free_only = True - headquarters = 'PL' - formats = ['EPUB', 'PDF'] - class StoreCdpStore(StoreBase): name = 'Cdp.pl' author = u'Tomasz Długosz' @@ -1718,7 +1708,6 @@ plugins += [ StoreBNStore, StoreBeamEBooksDEStore, StoreBiblioStore, - StoreBookotekaStore, StoreChitankaStore, StoreCdpStore, StoreDieselEbooksStore, diff --git a/src/calibre/gui2/store/stores/bookoteka_plugin.py b/src/calibre/gui2/store/stores/bookoteka_plugin.py deleted file mode 100644 index 7c3b2e8242..0000000000 --- a/src/calibre/gui2/store/stores/bookoteka_plugin.py +++ /dev/null @@ -1,76 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 1 # Needed for dynamic plugin loading - -__license__ = 'GPL 3' -__copyright__ = '2011, Tomasz Długosz ' -__docformat__ = 'restructuredtext en' - -import urllib -from contextlib import closing - -from lxml import html - -from PyQt4.Qt import QUrl - -from calibre import browser, url_slash_cleaner -from calibre.gui2 import open_url -from calibre.gui2.store import StorePlugin -from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.search_result import SearchResult -from calibre.gui2.store.web_store_dialog import WebStoreDialog - -class BookotekaStore(BasicStoreConfig, StorePlugin): - - def open(self, parent=None, detail_item=None, external=False): - - url = 'http://bookoteka.pl/ebooki' - detail_url = None - - if detail_item: - detail_url = detail_item - - if external or self.config.get('open_external', False): - open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) - else: - d = WebStoreDialog(self.gui, url, parent, detail_url) - d.setWindowTitle(self.name) - d.set_tags(self.config.get('tags', '')) - d.exec_() - - def search(self, query, max_results=10, timeout=60): - url = 'http://bookoteka.pl/list?search=' + urllib.quote_plus(query) + '&cat=1&hp=1&type=1' - - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//li[@class="EBOOK"]'): - if counter <= 0: - break - - id = ''.join(data.xpath('.//a[@class="item_link"]/@href')) - if not id: - continue - - cover_url = ''.join(data.xpath('.//a[@class="item_link"]/img/@src')) - title = ''.join(data.xpath('.//div[@class="shelf_title"]/a/text()')) - author = ''.join(data.xpath('.//div[@class="shelf_authors"][1]/text()')) - price = ''.join(data.xpath('.//span[@class="EBOOK"]/text()')) - price = price.replace('.', ',') - formats = ', '.join(data.xpath('.//a[@class="fancybox protected"]/text()')) - - counter -= 1 - - s = SearchResult() - s.cover_url = 'http://bookoteka.pl' + cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price - s.detail_item = 'http://bookoteka.pl' + id.strip() - s.drm = SearchResult.DRM_UNLOCKED - s.formats = formats.strip() - - yield s From e1d99a09aba143adf4f4475bdf7288ffc82ea069 Mon Sep 17 00:00:00 2001 From: Carlos Alves Date: Sun, 29 Sep 2013 13:45:11 -0300 Subject: [PATCH 05/88] Add some recipes of Uruguay. Add 10minutos.recipe (News of Salto) Add diario_el_pueblo.recipe (News of Salto) Add diario_salto.recipe (News of Salto) Add unoticias.recipe (News of Montevideo) --- recipes/10minutos.recipe | 50 +++++++++++++++++++++++++++++ recipes/diario_el_pueblo.recipe | 51 ++++++++++++++++++++++++++++++ recipes/diario_salto.recipe | 50 +++++++++++++++++++++++++++++ recipes/unoticias.recipe | 56 +++++++++++++++++++++++++++++++++ 4 files changed, 207 insertions(+) create mode 100644 recipes/10minutos.recipe create mode 100644 recipes/diario_el_pueblo.recipe create mode 100644 recipes/diario_salto.recipe create mode 100644 recipes/unoticias.recipe diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe new file mode 100644 index 0000000000..4c2f8a7ec7 --- /dev/null +++ b/recipes/10minutos.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +10minutos.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = '10minutos' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-content'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='p', attrs={'class':'post-meta'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://10minutos.com.uy/feed/') + ] + + def get_cover_url(self): + return 'http://10minutos.com.uy/a/img/logo.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe new file mode 100644 index 0000000000..4cfab9eb32 --- /dev/null +++ b/recipes/diario_el_pueblo.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarioelpueblo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pueblo' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-alt blog'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'volver-arriba-right','navigation']}), + dict(name='div', attrs={'id':'comment','id':'suckerfish','id':'crp_related'}), + dict(name='h3', attrs={'class':['post_date']}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed') + ] + + def get_cover_url(self): + return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_salto.recipe b/recipes/diario_salto.recipe new file mode 100644 index 0000000000..799233db4d --- /dev/null +++ b/recipes/diario_salto.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarisalto.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario Salto' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom') + ] + + def get_cover_url(self): + return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/unoticias.recipe b/recipes/unoticias.recipe new file mode 100644 index 0000000000..f7d1c7693c --- /dev/null +++ b/recipes/unoticias.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +unoticias.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'UNoticias' + __author__ = 'Carlos Alves' + description = 'Noticias Uruguay' + tags = 'news, sports, politics' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'ISO-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'nombre'}), + dict(name='h2', attrs={'class':'copete t20'}), + dict(name='div', attrs={'class':'desc'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Nacionales', u'http://www.unoticias.com.uy/RSS/nacionales.xml'), + (u'Deportes', u'http://www.unoticias.com.uy/RSS/deportes.xml'), + (u'Sociedad', u'http://www.unoticias.com.uy/RSS/Sociedad.xml') + ] + + def get_cover_url(self): + return 'http://www.unoticias.com.uy/artworks/logos/logo_small.gif' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup From a342162cffd5b78368d06807a7703e4f99755bbc Mon Sep 17 00:00:00 2001 From: Carlos Alves Date: Sun, 29 Sep 2013 17:34:45 -0300 Subject: [PATCH 06/88] Add padreydecano.recipe, update el_observador... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add padreydecano.recipe (C.A. Peñarol related news) Update el_observador.recipe it was broken for a long time. --- recipes/el_observador.recipe | 38 +++++++++++++-------------- recipes/padreydecano.recipe | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 19 deletions(-) create mode 100644 recipes/padreydecano.recipe diff --git a/recipes/el_observador.recipe b/recipes/el_observador.recipe index 994963671e..c82a1b7380 100644 --- a/recipes/el_observador.recipe +++ b/recipes/el_observador.recipe @@ -1,18 +1,23 @@ #!/usr/bin/env python +## +## Last Edited: 2013-09-29 Carlos Alves +## __license__ = 'GPL v3' __author__ = '2010, Yuri Alvarez' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' + ''' -observa.com.uy +elobservador.com.uy ''' from calibre.web.feeds.news import BasicNewsRecipe -class ObservaDigital(BasicNewsRecipe): - title = 'Observa Digital' - __author__ = 'yrvn' - description = 'Noticias de Uruguay' +class Noticias(BasicNewsRecipe): + title = 'El Observador' + __author__ = 'yrvn' + description = 'Noticias desde Uruguay' + tags = 'news, sports, entretainment' language = 'es_UY' timefmt = '[%a, %d %b, %Y]' use_embedded_content = False @@ -23,13 +28,18 @@ class ObservaDigital(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 - keep_only_tags = [dict(id=['contenido'])] + keep_only_tags = [ + dict(name='div', attrs={'class':'story collapsed'}) + ] remove_tags = [ - dict(name='div', attrs={'id':'contenedorVinculadas'}), - dict(name='p', attrs={'id':'nota_firma'}), + dict(name='div', attrs={'class':['fecha', 'copyright', 'story_right']}), + dict(name='div', attrs={'class':['photo', 'social']}), + dict(name='div', attrs={'id':'widget'}), dict(name=['object','link']) ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + extra_css = ''' h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} @@ -37,19 +47,9 @@ class ObservaDigital(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'), - (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'), - (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'), - (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml') + (u'Portada', u'http://elobservador.com.uy/rss/portada/'), ] - def get_cover_url(self): - index = 'http://www.observa.com.uy/' - soup = self.index_to_soup(index) - for image in soup.findAll('img',alt=True): - if image['alt'].startswith('Tapa El Observador'): - return image['src'].rstrip('b.jpg') + '.jpg' - return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/padreydecano.recipe b/recipes/padreydecano.recipe new file mode 100644 index 0000000000..3e1cbf24f2 --- /dev/null +++ b/recipes/padreydecano.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +padreydecano.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Padre y Decano' + __author__ = 'Carlos Alves' + description = 'El sitio del pueblo' + tags = 'soccer, futbol, Peñarol' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = None + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'entry-title'}), + dict(name='div', attrs={'class':'entry-content clearfix'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='dl', attrs={'class':'gallery-item'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/') + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup From e34d7bdee724b12c33aae76f7ee9626766e6bbb0 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Tue, 1 Oct 2013 19:17:25 +0200 Subject: [PATCH 07/88] Use much less memory when reading and writing cache --- .../devices/smart_device_app/driver.py | 90 ++++++++++++++----- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py index 3be0878aa4..270353ffac 100644 --- a/src/calibre/devices/smart_device_app/driver.py +++ b/src/calibre/devices/smart_device_app/driver.py @@ -36,6 +36,7 @@ from calibre.library.server import server_config as content_server_config from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.ipc import eintr_retry_call from calibre.utils.config_base import tweaks +from calibre.utils.config import to_json, from_json from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as unpublish_zeroconf, get_all_ips) @@ -717,32 +718,75 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): return None def _read_metadata_cache(self): +# cache_file_name = os.path.join(cache_dir(), +# 'device_drivers_' + self.__class__.__name__ + +# '_metadata_cache.pickle') +# if os.path.exists(cache_file_name): +# with open(cache_file_name, mode='rb') as fd: +# json_metadata = cPickle.load(fd) +# for uuid,json_book in json_metadata.iteritems(): +# book = self.json_codec.raw_to_book(json_book['book'], SDBook, self.PREFIX) +# self.known_uuids[uuid]['book'] = book +# self.known_uuids[uuid]['last_used'] = json_book['last_used'] +# lpath = book.get('lpath') +# if lpath in self.known_metadata: +# self.known_uuids.pop(uuid, None) +# else: +# self.known_metadata[lpath] = book + cache_file_name = os.path.join(cache_dir(), 'device_drivers_' + self.__class__.__name__ + - '_metadata_cache.pickle') - if os.path.exists(cache_file_name): - with open(cache_file_name, mode='rb') as fd: - json_metadata = cPickle.load(fd) - for uuid,json_book in json_metadata.iteritems(): - book = self.json_codec.raw_to_book(json_book['book'], SDBook, self.PREFIX) - self.known_uuids[uuid]['book'] = book - self.known_uuids[uuid]['last_used'] = json_book['last_used'] - lpath = book.get('lpath') - if lpath in self.known_metadata: - self.known_uuids.pop(uuid, None) - else: - self.known_metadata[lpath] = book + '_metadata_cache.json') + self.known_uuids = defaultdict(dict) + self.known_metadata = {} + with open(cache_file_name, mode='rb') as fd: + while True: + try: + rec_len = fd.readline() + if len(rec_len) != 8: + break + raw = fd.read(int(rec_len)) + book = json.loads(raw.decode('utf-8'), object_hook=from_json) + uuid = book.keys()[0] + metadata = self.json_codec.raw_to_book(book[uuid]['book'], + SDBook, self.PREFIX) + book[uuid]['book'] = metadata + self.known_uuids.update(book) + + lpath = metadata.get('lpath') + if lpath in self.known_metadata: + self.known_uuids.pop(uuid, None) + else: + self.known_metadata[lpath] = metadata + except: + traceback.print_exc() def _write_metadata_cache(self): +# cache_file_name = os.path.join(cache_dir(), +# 'device_drivers_' + self.__class__.__name__ + +# '_metadata_cache.pickle') +# json_metadata = defaultdict(dict) +# for uuid,book in self.known_uuids.iteritems(): +# json_metadata[uuid]['book'] = self.json_codec.encode_book_metadata(book['book']) +# json_metadata[uuid]['last_used'] = book['last_used'] +# with open(cache_file_name, mode='wb') as fd: +# cPickle.dump(json_metadata, fd, -1) + cache_file_name = os.path.join(cache_dir(), 'device_drivers_' + self.__class__.__name__ + - '_metadata_cache.pickle') - json_metadata = defaultdict(dict) - for uuid,book in self.known_uuids.iteritems(): - json_metadata[uuid]['book'] = self.json_codec.encode_book_metadata(book['book']) - json_metadata[uuid]['last_used'] = book['last_used'] + '_metadata_cache.json') with open(cache_file_name, mode='wb') as fd: - cPickle.dump(json_metadata, fd, -1) + try: + for uuid,book in self.known_uuids.iteritems(): + json_metadata = defaultdict(dict) + json_metadata[uuid]['book'] = self.json_codec.encode_book_metadata(book['book']) + json_metadata[uuid]['last_used'] = book['last_used'] + result = json.dumps(json_metadata, indent=2, default=to_json) + fd.write("%0.7d\n"%(len(result)+1)) + fd.write(result) + fd.write('\n') + except: + traceback.print_exc() def _set_known_metadata(self, book, remove=False): from calibre.utils.date import now @@ -757,7 +801,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): if key: self.known_uuids.pop(key, None) else: - new_book = self.known_metadata[lpath] = book.deepcopy() + # Check if we have another UUID with the same lpath. If so, remove it + existing_uuid = self.known_metadata.get(lpath, {}).get('uuid', None) + if existing_uuid: + self.known_uuids.pop(existing_uuid + ext, None) + + new_book = book.deepcopy() + self.known_metadata[lpath] = new_book if key: self.known_uuids[key]['book'] = new_book self.known_uuids[key]['last_used'] = now() From e161df6fff64ec742bdf6f75533984f5aaf45eba Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Tue, 1 Oct 2013 19:27:08 +0200 Subject: [PATCH 08/88] More cache work -- remove some code, get rid of old file. --- .../devices/smart_device_app/driver.py | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py index 270353ffac..0fd8fd23b1 100644 --- a/src/calibre/devices/smart_device_app/driver.py +++ b/src/calibre/devices/smart_device_app/driver.py @@ -687,10 +687,6 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): key = uuid+ext if isinstance(lastmod, unicode): lastmod = parse_date(lastmod) -# if key in self.known_uuids: -# self._debug(key, lastmod, self.known_uuids[key].last_modified) -# else: -# self._debug(key, 'not in known uuids') if key in self.known_uuids and self.known_uuids[key]['book'].last_modified == lastmod: self.known_uuids[key]['last_used'] = now() return self.known_uuids[key]['book'].deepcopy() @@ -718,21 +714,14 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): return None def _read_metadata_cache(self): -# cache_file_name = os.path.join(cache_dir(), -# 'device_drivers_' + self.__class__.__name__ + -# '_metadata_cache.pickle') -# if os.path.exists(cache_file_name): -# with open(cache_file_name, mode='rb') as fd: -# json_metadata = cPickle.load(fd) -# for uuid,json_book in json_metadata.iteritems(): -# book = self.json_codec.raw_to_book(json_book['book'], SDBook, self.PREFIX) -# self.known_uuids[uuid]['book'] = book -# self.known_uuids[uuid]['last_used'] = json_book['last_used'] -# lpath = book.get('lpath') -# if lpath in self.known_metadata: -# self.known_uuids.pop(uuid, None) -# else: -# self.known_metadata[lpath] = book + try: + old_cache_file_name = os.path.join(cache_dir(), + 'device_drivers_' + self.__class__.__name__ + + '_metadata_cache.pickle') + if os.path.exists(old_cache_file_name): + os.remove(old_cache_file_name) + except: + pass cache_file_name = os.path.join(cache_dir(), 'device_drivers_' + self.__class__.__name__ + From e96c33b0590452d7a281991105c827b3ae971d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 29 Sep 2013 21:54:46 +0200 Subject: [PATCH 09/88] ebookpoint plugin: exclude audiobooks from list of items --- src/calibre/gui2/store/stores/ebookpoint_plugin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/stores/ebookpoint_plugin.py b/src/calibre/gui2/store/stores/ebookpoint_plugin.py index 427cde6217..312f4baca8 100644 --- a/src/calibre/gui2/store/stores/ebookpoint_plugin.py +++ b/src/calibre/gui2/store/stores/ebookpoint_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 3 # Needed for dynamic plugin loading +store_version = 4 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011-2013, Tomasz Długosz ' @@ -60,13 +60,15 @@ class EbookpointStore(BasicStoreConfig, StorePlugin): if not id: continue + formats = ', '.join(data.xpath('.//div[@class="ikony"]/span/text()')) + if formats == 'MP3': + continue cover_url = ''.join(data.xpath('.//a[@class="cover"]/img/@src')) title = ''.join(data.xpath('.//h3/a/@title')) title = re.sub('eBook.', '', title) author = ''.join(data.xpath('.//p[@class="author"]//text()')) price = ''.join(data.xpath('.//p[@class="price"]/ins/text()')) - formats = ', '.join(data.xpath('.//div[@class="ikony"]/span/text()')) counter -= 1 From 563be2632200f94cebc95462c1bad4df38e073d5 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Wed, 2 Oct 2013 10:49:30 +0200 Subject: [PATCH 10/88] Make metadata cache reading slightly more robust --- src/calibre/devices/smart_device_app/driver.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py index 0fd8fd23b1..a0cc3086a8 100644 --- a/src/calibre/devices/smart_device_app/driver.py +++ b/src/calibre/devices/smart_device_app/driver.py @@ -729,8 +729,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): self.known_uuids = defaultdict(dict) self.known_metadata = {} with open(cache_file_name, mode='rb') as fd: - while True: - try: + try: + while True: rec_len = fd.readline() if len(rec_len) != 8: break @@ -747,20 +747,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): self.known_uuids.pop(uuid, None) else: self.known_metadata[lpath] = metadata - except: - traceback.print_exc() + except: + traceback.print_exc() def _write_metadata_cache(self): -# cache_file_name = os.path.join(cache_dir(), -# 'device_drivers_' + self.__class__.__name__ + -# '_metadata_cache.pickle') -# json_metadata = defaultdict(dict) -# for uuid,book in self.known_uuids.iteritems(): -# json_metadata[uuid]['book'] = self.json_codec.encode_book_metadata(book['book']) -# json_metadata[uuid]['last_used'] = book['last_used'] -# with open(cache_file_name, mode='wb') as fd: -# cPickle.dump(json_metadata, fd, -1) - cache_file_name = os.path.join(cache_dir(), 'device_drivers_' + self.__class__.__name__ + '_metadata_cache.json') From d868b2d54154d9d101659ad6b6044efee1b5c692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Wed, 2 Oct 2013 19:58:11 +0200 Subject: [PATCH 11/88] remove 'The H' recipe Unfortunatelly they're not publishing any more: http://www.h-online.com/open/news/item/The-H-is-closing-down-1920027.html --- recipes/the_h.recipe | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 recipes/the_h.recipe diff --git a/recipes/the_h.recipe b/recipes/the_h.recipe deleted file mode 100644 index 28a1571dc5..0000000000 --- a/recipes/the_h.recipe +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, Hiroshi Miura ' -''' -www.h-online.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TheHeiseOnline(BasicNewsRecipe): - title = u'The H' - __author__ = 'Hiroshi Miura' - oldest_article = 3 - description = 'In association with Heise Online' - publisher = 'Heise Media UK Ltd.' - category = 'news, technology, security, OSS, internet' - max_articles_per_feed = 100 - language = 'en' - encoding = 'utf-8' - conversion_options = { - 'comment' : description - ,'tags' : category - ,'publisher': publisher - ,'language' : language - } - feeds = [ - (u'The H News Feed', u'http://www.h-online.com/news/atom.xml') - ] - cover_url = 'http://www.h-online.com/icons/logo_theH.gif' - - remove_tags = [ - dict(id="logo"), - dict(id="footer") - ] - - def print_version(self, url): - return url + '?view=print' - From e9f5f8ea2ac5cc9a23513ac632d262bae56ba1c0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 07:23:01 +0530 Subject: [PATCH 12/88] Fix virtual libraries template function The virtual libraries template function was overriding the cache used for format_metadata in the ProxyMetadata object. --- src/calibre/db/lazy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/db/lazy.py b/src/calibre/db/lazy.py index 73260c8ae7..8935110b65 100644 --- a/src/calibre/db/lazy.py +++ b/src/calibre/db/lazy.py @@ -234,11 +234,11 @@ def composite_getter(mi, field, metadata, book_id, cache, formatter, template_ca def virtual_libraries_getter(dbref, book_id, cache): try: - return cache[field] + return cache['virtual_libraries'] except KeyError: db = dbref() vls = db.virtual_libraries_for_books((book_id,))[book_id] - ret = cache[field] = ', '.join(vls) + ret = cache['virtual_libraries'] = ', '.join(vls) return ret getters = { From ea9a2dfd8ff44285e867018b470276846f2f2b37 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 09:20:03 +0530 Subject: [PATCH 13/88] Update Neu Osnabrucker Zeitung --- recipes/neu_osnabrucker_zeitung.recipe | 139 ++++++++++++++++++------- 1 file changed, 99 insertions(+), 40 deletions(-) diff --git a/recipes/neu_osnabrucker_zeitung.recipe b/recipes/neu_osnabrucker_zeitung.recipe index dc6a36237e..011d97f801 100644 --- a/recipes/neu_osnabrucker_zeitung.recipe +++ b/recipes/neu_osnabrucker_zeitung.recipe @@ -1,49 +1,108 @@ # vim:fileencoding=utf-8 - from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1344926684(BasicNewsRecipe): - title = u'Neue Osnabrücker Zeitung' - __author__ = 'Krittika Goyal' - oldest_article = 7 - max_articles_per_feed = 100 - # auto_cleanup = True - no_stylesheets = True - use_embedded_content = False - language = 'de' +class AdvancedUserRecipe1380105782(BasicNewsRecipe): + title = u'Neue Osnabrücker Zeitung' + __author__ = 'vo_he' + description = 'Online auch ohne IPhone' + encoding = 'utf-8' + language = 'de' remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + cover_url = 'http://www.noz.de/bundles/nozplatform/images/logos/osnabruecker-zeitung.png' + + remove_tags_before =dict(id='feedContent') + remove_tags_before =dict(id='headline') + + remove_tags_after =dict(id='article-authorbox') + remove_tags_after =dict(id='footer-start') + remove_tags_after =dict(name='div', attrs={'class':'morelinks'}) - keep_only_tags = [ - dict(name='div', attrs={'class':'article'}), - dict(name='span', attrs={'id':'articletext'}) - ] remove_tags = [ - dict(name='div', attrs={'id':'retresco-title'}), - dict(name='div', attrs={'class':'retresco-item s1 relative'}), - dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'}), - dict(name='div', attrs={'class':'articleFunctions inlineTeaserRight'}), - dict(name='div', attrs={'class':'imageContainer '}), - dict(name='div', attrs={'class':'imageContainer centerContainer'}), - dict(name='div', attrs={'class':'grid singleCol articleTeaser'}), - dict(name='h3', attrs={'class':'teaserRow'}), - dict(name='div', attrs={'class':'related-comments'}), - dict(name='a', attrs={'class':' icon'}), - dict(name='a', attrs={'class':'right small'}), - dict(name='span', attrs={'class':'small block spaceBottom rectangleAd'}), + dict(name='div', attrs={'id':'ui-datepicker-div'}), + dict(name='div', attrs={'class':'nav-second'}), + dict(name='div', attrs={'class':'nav-first'}), + dict(name='div', attrs={'class':'icon-print'}), + dict(name='div', attrs={'class':'social-button'}), + dict(name='div', attrs={'class':'social-media-bar'}), + dict(name='div', attrs={'class':'pull-right'}), + dict(name='div', attrs={'class':'btn btn-primary flat-button'}), + dict(name='div', attrs={'class':'carousel-wrapper'}), + dict(name='a', attrs={'class':'right-content merchandising hidden-tablet'}), + dict(name='div', attrs={'class':'border-circle pull-left'}), + dict(name='div', attrs={'class':'row show-grid general-infoimageContainer '}), + dict(name='div', attrs={'class':'location-list'}), + dict(name='div', attrs={'class':'block'}), dict(name='div', attrs={'class':'furtherGalleries largeSpaceTop'}) ] - feeds = [(u'Lokales', u'http://www.noz.de/rss/Lokales'), -(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'), -(u'Politik', u'http://www.noz.de/rss/Politik'), -(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'), -(u'Kultur', u'http://www.noz.de/rss/Kultur'), -(u'Medien', u'http://www.noz.de/rss/Medien'), -(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'), -(u'Sport', u'http://www.noz.de/rss/Sport'), -(u'Computer', u'http://www.noz.de/rss/Computer'), -(u'Musik', u'http://www.noz.de/rss/Musik'), -(u'Szene', u'http://www.noz.de/rss/Szene'), -(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'), -(u'Kino', u'http://www.noz.de/rss/Kino')] - + feeds = [(u'Melle Mitte', u'http://www.noz.de/rss/ressort/Melle%20Mitte'), + (u'Melle Nord', u'http://www.noz.de/rss/ressort/Melle%20Nord'), + (u'Melle Sued', u'http://www.noz.de/rss/ressort/Melle%20S%C3%BCd'), + (u'Nordrhein Westfalen', u'http://www.noz.de/rss/ressort/Nordrhein-Westfalen'), + (u'Niedersachsen', u'http://www.noz.de/rss/ressort/Niedersachsen'), + (u'Vermischtes', u'http://www.noz.de/rss/ressort/Vermischtes'), + (u'GutzuWissen', u'http://www.noz.de/rss/ressort/Gut%20zu%20Wissen'), + (u'Sport', u'http://www.noz.de/rss/ressort/Sport'), + (u'Kultur', u'http://www.noz.de/rss/ressort/Kultur'), + (u'Medien', u'http://www.noz.de/rss/ressort/Medien'), + (u'Belm', u'http://www.noz.de/rss/ressort/Belm'), + (u'Bissendorf', u' [url]http://www.noz.de/rss/ressort/Bissendorf[/url]'), + (u'Osnabrueck', u'http://www.noz.de/rss/ressort/Osnabr%C3%BCck'), + (u'Bad Essen', u'http://www.noz.de/rss/ressort/Bad%20Essen'), + (u'Politik', u'http://www.noz.de/rss/ressort/Politik'), + (u'Wirtschaft', u'http://www.noz.de/rss/ressort/Wirtschaft'), +#(u'Fussball', u'http:/www.noz.de/rss/ressort/Fußball'), +#(u'VfL Osnabrueck', u'http://www.noz.de/rss/ressort/VfL%20Osnabr%C3%BCck'), +#(u'SF Lotte', u'http://www.noz.de/rss/ressort/SF%20Lotte'), +#(u'SV Meppen', u'http://www.noz.de/rss/ressort/SV%20Meppen'), +#(u'Artland Dragons', u'http://www.noz.de/rss/ressort/Artland%20Dragons'), +#(u'Panthers', u'http://www.noz.de/rss/ressort/Panthers'), +(u'OS-Sport', u'http://www.noz.de/rss/ressort/OS-Sport'), +#(u'Emsland Sport', u'http://www.noz.de/rss/ressort/EL-Sport'), +#(u'Lingen', u'http://www.noz.de/rss/ressort/Lingen'), +#(u'Lohne', u'http://www.noz.de/rss/ressort/Lohne'), +#(u'Emsbueren', u'http://www.noz.de/rss/ressort/Emsb%C3%BCren'), +#(u'Salzbergen', u'http://www.noz.de/rss/ressort/Salzbergen'), +#(u'Spelle', u'http://www.noz.de/rss/ressort/Spelle'), +#(u'Freren', u'http://www.noz.de/rss/ressort/Freren'), +#(u'Lengerich', u'http://www.noz.de/rss/ressort/Lengerich'), +#(u'Bad Iburg', u'http://www.noz.de/rss/ressort/Bad%20Iburg'), +#(u'Bad Laer', u'http://www.noz.de/rss/ressort/Bad%20Laer'), +#(u'Bad Rothenfelde', u'http://www.noz.de/rss/ressort/Bad%20Rothenfelde'), +#(u'GMHütte', u'http://www.noz.de/rss/ressort/Georgsmarienh%C3%BCtte'), +#(u'Glandorf', u'http://www.noz.de/rss/ressort/Glandorf'), +#(u'Hagen', u'http://www.noz.de/rss/ressort/Hagen'), +#(u'Hasbergen', u'http://www.noz.de/rss/ressort/Hasbergen'), +#(u'Hilter', u'http://www.noz.de/rss/ressort/Hilter'), +#(u'Lotte', u'http://www.noz.de/rss/ressort/Lotte'), +#(u'Wallenhorst', u'http://www.noz.de/rss/ressort/Wallenhorst'), +#(u'Westerkappeln', u'http://www.noz.de/rss/ressort/Westerkappeln'), +#(u'Artland', u'http://www.noz.de/rss/ressort/Artland'), +#(u'Bersenbrück', u'http://www.noz.de/rss/ressort/Bersenbr%C3%BCck'), +#(u'Fürstenau', u'http://www.noz.de/rss/ressort/F%C3%BCrstenau'), +#(u'Neuenkirchen', u'http://www.noz.de/rss/ressort/Neuenkirchen'), +#(u'Lokalsport', u'http://www.noz.de/rss/ressort/Lokalsport%20Nordkreis'), +#(u'Bramsche', u'http://www.noz.de/rss/ressort/Bramsche'), +#(u'Bramsche Ortsteile', u'http://www.noz.de/rss/ressort/Bramscher%20Ortsteile'), +#(u'Neuenkirchen Vörden', u'http://www.noz.de/rss/ressort/Neuenkirchen-V%C3%B6rden'), +#(u'Papenburg', u'http://www.noz.de/rss/ressort/Papenburg'), +#(u'Dörpen', u'http://www.noz.de/rss/ressort/D%C3%B6rpen'), +#(u'Rhede', u'http://www.noz.de/rss/ressort/Rhede'), +#(u'Lathen', u'http://www.noz.de/rss/ressort/Lathen'), +#(u'Sögel', u'http://www.noz.de/rss/ressort/S%C3%B6gel'), +#(u'Nordhümmling', u'http://www.noz.de/rss/ressort/Nordh%C3%BCmmling'), +#(u'Werlte', u'http://www.noz.de/rss/ressort/Werlte'), +#(u'Westoverledingen', u'http://www.noz.de/rss/ressort/Westoverledingen'), +#(u'Geeste', u'http://www.noz.de/rss/ressort/Geeste'), +#(u'Haren', u'http://www.noz.de/rss/ressort/Haren'), +#(u'Haselünne', u'http://www.noz.de/rss/ressort/Hasel%C3%BCnne'), +#(u'Herzlake', u'http://www.noz.de/rss/ressort/Herzlake'), +#(u'Meppen', u'http://www.noz.de/rss/ressort/Meppen'), +#(u'Twist', u'http://www.noz.de/rss/ressort/Twist'), +#(u'Bohmte', u'http://www.noz.de/rss/ressort/Bohmte'), +#(u'Ostercappeln', u'http://www.noz.de/rss/ressort/Ostercappeln') +] From fd77ad2c925de73b25971a99ddf6348b7a53db0e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 09:28:55 +0530 Subject: [PATCH 14/88] Update AM 730 and Ming Pao (HK) --- recipes/am730.recipe | 79 +++++++++++++++-------------------------- recipes/ming_pao.recipe | 7 ++-- 2 files changed, 33 insertions(+), 53 deletions(-) diff --git a/recipes/am730.recipe b/recipes/am730.recipe index 0fac4bea51..925a244362 100644 --- a/recipes/am730.recipe +++ b/recipes/am730.recipe @@ -3,10 +3,10 @@ from __future__ import unicode_literals __license__ = 'GPL v3' __copyright__ = '2013, Eddie Lau' __Date__ = '' -__HiResImg__ = True ''' Change Log: +2013/09/28 -- update due to website redesign, add cover 2013/03/30 -- first version ''' @@ -32,18 +32,17 @@ class AppleDaily(BasicNewsRecipe): encoding = 'utf-8' auto_cleanup = False remove_javascript = True - use_embedded_content = False + use_embedded_content = False no_stylesheets = True description = 'http://www.am730.com.hk' category = 'Chinese, News, Hong Kong' masthead_url = 'http://www.am730.com.hk/images/logo.jpg' - - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}' - keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}), - dict(name='div', attrs={'class':'thecontent wordsnap'}), - dict(name='a', attrs={'class':'lightboximg'})] - remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}), - dict(name='img', attrs={'src':'/images/am_endmark.gif'})] + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}), + dict(name='div', attrs={'id':'article_content'}), + dict(name='div', attrs={'id':'slider'})] + remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}), + dict(name='img', attrs={'src':'images/am_endmark.gif'})] def get_dtlocal(self): dt_utc = datetime.datetime.utcnow() @@ -84,6 +83,16 @@ class AppleDaily(BasicNewsRecipe): def get_weekday(self): return self.get_dtlocal().weekday() + def get_cover_url(self): + soup = self.index_to_soup('http://www.am730.com.hk') + cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False) + br = BasicNewsRecipe.get_browser(self) + try: + br.open(cover) + except: + cover = None + return cover + def populate_article_metadata(self, article, soup, first): if first and hasattr(self, 'add_toc_thumbnail'): picdiv = soup.find('img') @@ -93,48 +102,17 @@ class AppleDaily(BasicNewsRecipe): def parse_index(self): feeds = [] soup = self.index_to_soup('http://www.am730.com.hk/') - ul = soup.find(attrs={'class':'nav-section'}) - sectionList = [] - for li in ul.findAll('li'): - a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False) - title = li.find('a').get('title', False).strip() - sectionList.append((title, a)) - for title, url in sectionList: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) + optgroups = soup.findAll('optgroup') + for optgroup in optgroups: + sectitle = optgroup.get('label') + articles = [] + for option in optgroup.findAll('option'): + articlelink = "http://www.am730.com.hk/" + option.get('value') + title = option.string + articles.append({'title': title, 'url': articlelink}) + feeds.append((sectitle, articles)) return feeds - - def parse_section(self, url): - soup = self.index_to_soup(url) - items = soup.findAll(attrs={'style':'padding-bottom: 15px;'}) - current_articles = [] - for item in items: - a = item.find(attrs={'class':'t6 f14'}).find('a', href=True) - articlelink = 'http://www.am730.com.hk/' + a.get('href', True) - title = self.tag_to_string(a) - description = self.tag_to_string(item.find(attrs={'class':'t3 f14'})) - current_articles.append({'title': title, 'url': articlelink, 'description': description}) - return current_articles - - def preprocess_html(self, soup): - multia = soup.findAll('a') - for a in multia: - if not (a == None): - image = a.find('img') - if not (image == None): - if __HiResImg__: - image['src'] = image.get('src').replace('/thumbs/', '/') - caption = image.get('alt') - tag = Tag(soup, "photo", []) - tag2 = Tag(soup, "photocaption", []) - tag.insert(0, image) - if not caption == None: - tag2.insert(0, caption) - tag.insert(1, tag2) - a.replaceWith(tag) - return soup - + def create_opf(self, feeds, dir=None): if dir is None: dir = self.output_dir @@ -288,3 +266,4 @@ class AppleDaily(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) + diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe index a655d598e4..dffbe27f89 100644 --- a/recipes/ming_pao.recipe +++ b/recipes/ming_pao.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2011, Eddie Lau' +__copyright__ = '2010-2013, Eddie Lau' # Region - Hong Kong, Vancouver, Toronto __Region__ = 'Hong Kong' @@ -32,6 +32,7 @@ __Date__ = '' ''' Change Log: +2013/09/28: allow thumbnails even with hi-res images 2012/04/24: improved parsing of news.mingpao.com content 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day @@ -846,8 +847,7 @@ class MPRecipe(BasicNewsRecipe): return soup def populate_article_metadata(self, article, soup, first): - # thumbnails shouldn't be available if using hi-res images - if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'): + if __IncludeThumbnails__ and first and hasattr(self, 'add_toc_thumbnail'): img = soup.find('img') if img is not None: self.add_toc_thumbnail(article, img['src']) @@ -1071,3 +1071,4 @@ class MPRecipe(BasicNewsRecipe): + From 5ca115eab0e3d895ef2e6e8e4586c276909226f7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 10:08:37 +0530 Subject: [PATCH 15/88] calibredb: Allow setting fo title sort field Fixes #1233711 [CalibreDB: title_sort is not a known field](https://bugs.launchpad.net/calibre/+bug/1233711) --- src/calibre/library/cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 89989102e2..f0760e7439 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -629,9 +629,12 @@ def command_set_metadata(args, dbpath): if opts.field: fields = {k:v for k, v in fields()} + fields['title_sort'] = fields['sort'] vals = {} for x in opts.field: field, val = x.partition(':')[::2] + if field == 'sort': + field = 'title_sort' if field not in fields: print >>sys.stderr, _('%s is not a known field'%field) return 1 From 9dd410ed70fddda4d2871217ec3dfc681703feee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 10:28:06 +0530 Subject: [PATCH 16/88] DOCX Input: Handle numbering without character styles DOCX Input: Fix erorr when converting docx files that have numbering defined with no associated character style. Fixes #1232100 [Private bug](https://bugs.launchpad.net/calibre/+bug/1232100) --- src/calibre/ebooks/docx/numbering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index 602689a8cd..740674d0dd 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -90,7 +90,7 @@ class Level(object): self.is_numbered = False cs = self.character_style if lt in {'\uf0a7', 'o'} or ( - cs.font_family is not inherit and cs.font_family.lower() in {'wingdings', 'symbol'}): + cs is not None and cs.font_family is not inherit and cs.font_family.lower() in {'wingdings', 'symbol'}): self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc') else: self.bullet_template = lt From d4d98e3f2a5a02995c040dc466cbfb4efd62fe71 Mon Sep 17 00:00:00 2001 From: David Forrester Date: Thu, 3 Oct 2013 15:20:32 +1000 Subject: [PATCH 17/88] Display version on Kobo unsupported firmware message When calibre shows the message that says the Kobo device is not supported, the firmware version and database version on the device is not displayed. Adding this to make support a little easier. --- src/calibre/devices/kobo/driver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index e8985b1ff9..1f41e583bb 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -63,7 +63,7 @@ class KOBO(USBMS): gui_name = 'Kobo Reader' description = _('Communicate with the Kobo Reader') author = 'Timothy Legge and David Forrester' - version = (2, 1, 3) + version = (2, 1, 4) dbversion = 0 fwversion = 0 @@ -661,6 +661,8 @@ class KOBO(USBMS): ' "Attempt to support newer firmware" option.' ' Doing so may require you to perform a factory reset of' ' your Kobo.' + '\nDevice database version: %s.' + '\nDevice firmware version: %s' % (self.dbversion, self.fwversion) ), UserFeedback.WARN) @@ -2834,6 +2836,8 @@ class KOBOTOUCH(KOBO): ' "Attempt to support newer firmware" option.' ' Doing so may require you to perform a factory reset of' ' your Kobo.' + '\nDevice database version: %s.' + '\nDevice firmware version: %s' % (self.dbversion, self.fwversion) ), UserFeedback.WARN) From d445cb3b6b74ff1c2afed6b4bb8e91bdf6bd2260 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 11:19:33 +0530 Subject: [PATCH 18/88] ... --- src/calibre/devices/kobo/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 1f41e583bb..1410145811 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -661,7 +661,7 @@ class KOBO(USBMS): ' "Attempt to support newer firmware" option.' ' Doing so may require you to perform a factory reset of' ' your Kobo.' - '\nDevice database version: %s.' + '\nDevice database version: %s.' '\nDevice firmware version: %s' % (self.dbversion, self.fwversion) ), UserFeedback.WARN) @@ -2836,7 +2836,7 @@ class KOBOTOUCH(KOBO): ' "Attempt to support newer firmware" option.' ' Doing so may require you to perform a factory reset of' ' your Kobo.' - '\nDevice database version: %s.' + '\nDevice database version: %s.' '\nDevice firmware version: %s' % (self.dbversion, self.fwversion) ), UserFeedback.WARN) From c0c64d041919bb0884c445de5bdc323f3580b6c3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Oct 2013 11:25:11 +0530 Subject: [PATCH 19/88] ... --- src/calibre/devices/kobo/driver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 1410145811..11981cbcd6 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -660,11 +660,10 @@ class KOBO(USBMS): ' selecting "Configure this device" and then the ' ' "Attempt to support newer firmware" option.' ' Doing so may require you to perform a factory reset of' - ' your Kobo.' + ' your Kobo.') + (( '\nDevice database version: %s.' - '\nDevice firmware version: %s' % (self.dbversion, self.fwversion) - ), - UserFeedback.WARN) + '\nDevice firmware version: %s') % (self.dbversion, self.fwversion)) + , UserFeedback.WARN) return False else: From 6b74d5078e656e041d89927b19e2bc6ec4f39454 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 4 Oct 2013 08:33:03 +0530 Subject: [PATCH 20/88] Update Carta Capital Fixes #1235052 [News "Carta Capital" is not working](https://bugs.launchpad.net/calibre/+bug/1235052) --- recipes/carta_capital.recipe | 42 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/recipes/carta_capital.recipe b/recipes/carta_capital.recipe index 8bd21046b1..ba13856e16 100644 --- a/recipes/carta_capital.recipe +++ b/recipes/carta_capital.recipe @@ -1,23 +1,29 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1312361378(BasicNewsRecipe): - title = u'Carta capital' - __author__ = 'Pablo Aldama' +class AdvancedUserRecipe1380852962(BasicNewsRecipe): + title = u'Carta Capital' + __author__ = 'Erico Lisboa' language = 'pt_BR' - oldest_article = 9 + oldest_article = 15 max_articles_per_feed = 100 + auto_cleanup = True + use_embedded_content = False - feeds = [(u'Politica', u'http://www.cartacapital.com.br/category/politica/feed') - ,(u'Economia', u'http://www.cartacapital.com.br/category/economia/feed') - ,(u'Cultura', u'http://www.cartacapital.com.br/category/cultura/feed') - ,(u'Internacional', u'http://www.cartacapital.com.br/category/internacional/feed') - ,(u'Saude', u'http://www.cartacapital.com.br/category/saude/feed') - ,(u'Sociedade', u'http://www.cartacapital.com.br/category/sociedade/feed') - ,(u'Tecnologia', u'http://www.cartacapital.com.br/category/tecnologia/feed') - ,(u'Carta na escola', u'http://www.cartacapital.com.br/category/carta-na-escola/feed') - ,(u'Carta fundamental', u'http://www.cartacapital.com.br/category/carta-fundamental/feed') - ,(u'Carta verde', u'http://www.cartacapital.com.br/category/carta-verde/feed') - -] - def print_version(self, url): - return url + '/print' + feeds = [(u'Pol\xedtica', +u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia', +u'http://www.cartacapital.com.br/economia/economia/atom.xml'), +(u'Sociedade', +u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'), +(u'Internacional', +u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'), +(u'Tecnologia', +u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'), +(u'Cultura', +u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'), +(u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'), +(u'Educa\xe7\xe3o', +u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')] From 038a9d99dc4020781c1779ce2903c84c59a734e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 4 Oct 2013 08:52:54 +0530 Subject: [PATCH 21/88] HTML Input: Handle the poster attribute of the