From 9e660527874eda0d480a7887ed332cdc7fb8297f Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Mon, 10 Oct 2011 18:38:35 -0300 Subject: [PATCH 01/33] Add support for adding kepubs to the library --- src/calibre/devices/kobo/driver.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index cdb9e0c4c9..0e11302ec3 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Timothy Legge and Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os +import os, shutil import sqlite3 as sqlite from contextlib import closing from calibre.devices.usbms.books import BookList @@ -16,6 +16,7 @@ from calibre.devices.usbms.driver import USBMS, debug_print from calibre import prints from calibre.devices.usbms.books import CollectionsBookList from calibre.utils.magick.draw import save_cover_data_to +from calibre.ptempfile import PersistentTemporaryFile class KOBO(USBMS): @@ -865,3 +866,21 @@ class KOBO(USBMS): else: debug_print("ImageID could not be retreived from the database") + def prepare_addable_books(self, paths): + ''' + The Kobo supports an encrypted epub refered to as a kepub + Unfortunately Kobo decided to put the files on the device + with no file extension. I just hope that decision causes + them as much grief as it does me :-) + + This has to make a temporary copy of the book files with a + epub extension to allow Calibre's normal processing to + deal with the file appropriately + ''' + for idx, path in enumerate(paths): + if path.find('kepub') >= 0: + with closing(open(path)) as r: + tf = PersistentTemporaryFile(suffix='.epub') + tf.write(r.read()) + paths[idx] = tf.name + return paths From 8bc521d0451c4eece043caa19e66683f9521fbe8 Mon Sep 17 00:00:00 2001 From: Sengian Date: Sat, 15 Oct 2011 21:07:43 +0200 Subject: [PATCH 02/33] RTF cleanup --- src/calibre/ebooks/rtf2xml/add_brackets.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/rtf2xml/add_brackets.py b/src/calibre/ebooks/rtf2xml/add_brackets.py index a7888f32b8..d4919d4cd7 100755 --- a/src/calibre/ebooks/rtf2xml/add_brackets.py +++ b/src/calibre/ebooks/rtf2xml/add_brackets.py @@ -11,11 +11,11 @@ # # # # ######################################################################### -import sys, os, tempfile +import sys, os, tempfile + from calibre.ebooks.rtf2xml import copy, check_brackets # note to self. This is the first module in which I use tempfile. A good idea? -""" -""" + class AddBrackets: """ Add brackets for old RTF. @@ -41,6 +41,7 @@ class AddBrackets: self.__copy = copy self.__write_to = tempfile.mktemp() self.__run_level = run_level + def __initiate_values(self): """ """ @@ -82,14 +83,16 @@ class AddBrackets: 'cw Date: Sun, 16 Oct 2011 11:23:56 +0200 Subject: [PATCH 03/33] Fix Liberation news website recipe --- recipes/liberation.recipe | 85 +++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 26 deletions(-) diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index 20f12b2d57..7183e26909 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -9,39 +9,72 @@ liberation.fr from calibre.web.feeds.news import BasicNewsRecipe class Liberation(BasicNewsRecipe): + title = u'Liberation' - __author__ = 'Darko Miletic' - description = 'News from France' - language = 'fr' + __author__ = 'calibre' + description = 'Actualités' + category = 'Actualités, France, Monde' + language = 'fr' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True - html2lrf_options = ['--base-font-size', '10'] + extra_css = ''' + h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} + h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' keep_only_tags = [ - dict(name='h1') - #,dict(name='div', attrs={'class':'object-content text text-item'}) - ,dict(name='div', attrs={'class':'article'}) - #,dict(name='div', attrs={'class':'articleContent'}) - ,dict(name='div', attrs={'class':'entry'}) - ] - remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ] + dict(name='div', attrs={'class':'article'}) + ,dict(name='div', attrs={'class':'text-article m-bot-s1'}) + ,dict(name='div', attrs={'class':'entry'}) + ,dict(name='div', attrs={'class':'col_contenu'}) + ] + + remove_tags_after = [ + dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']}) + ,dict(name='p',attrs={'class':['chapo']}) + ,dict(id='_twitter_facebook') + ] + remove_tags = [ - dict(name='p', attrs={'class':'clear'}) - ,dict(name='ul', attrs={'class':'floatLeft clear'}) - ,dict(name='div', attrs={'class':'clear floatRight'}) - ,dict(name='object') - ,dict(name='div', attrs={'class':'toolbox'}) - ,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'}) - #,dict(name='div', attrs={'class':'clear block block-call-items'}) - ,dict(name='div', attrs={'class':'block-content'}) + dict(name='iframe') + ,dict(name='a', attrs={'class':'lnk-comments'}) + ,dict(name='div', attrs={'class':'toolbox'}) + ,dict(name='ul', attrs={'class':'share-box'}) + ,dict(name='ul', attrs={'class':'tool-box'}) + ,dict(name='ul', attrs={'class':'rub'}) + ,dict(name='p',attrs={'class':['chapo']}) + ,dict(name='p',attrs={'class':['tag']}) + ,dict(name='div',attrs={'class':['blokLies']}) + ,dict(name='div',attrs={'class':['alire']}) + ,dict(id='_twitter_facebook') ] feeds = [ - (u'La une', u'http://www.liberation.fr/rss/laune') - ,(u'Monde' , u'http://www.liberation.fr/rss/monde') - ,(u'Sports', u'http://www.liberation.fr/rss/sports') + (u'La une', u'http://rss.liberation.fr/rss/9/') + ,(u'Monde' , u'http://www.liberation.fr/rss/10/') + ,(u'Économie', u'http://www.liberation.fr/rss/13/') + ,(u'Politiques', u'http://www.liberation.fr/rss/11/') + ,(u'Société', u'http://www.liberation.fr/rss/12/') + ,(u'Cinéma', u'http://www.liberation.fr/rss/58/') + ,(u'Écran', u'http://www.liberation.fr/rss/53/') + ,(u'Sports', u'http://www.liberation.fr/rss/12/') ] + + def get_masthead_url(self): + masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From 1bb39ffd77b8371411968c43ceb3669479071822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:24:15 +0200 Subject: [PATCH 04/33] Fix USA Today news website recipe --- recipes/usatoday.recipe | 56 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/recipes/usatoday.recipe b/recipes/usatoday.recipe index 18aeab2648..62c5f1c2da 100644 --- a/recipes/usatoday.recipe +++ b/recipes/usatoday.recipe @@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe class USAToday(BasicNewsRecipe): - title = 'USA Today' - __author__ = 'Kovid Goyal' - oldest_article = 1 - publication_type = 'newspaper' - timefmt = '' - max_articles_per_feed = 20 - language = 'en' - no_stylesheets = True - extra_css = '.headline {text-align: left;}\n \ - .byline {font-family: monospace; \ - text-align: left; \ - margin-bottom: 1em;}\n \ - .image {text-align: center;}\n \ - .caption {text-align: center; \ - font-size: smaller; \ - font-style: italic}\n \ - .credit {text-align: right; \ - margin-bottom: 0em; \ - font-size: smaller;}\n \ - .articleBody {text-align: left;}\n ' - #simultaneous_downloads = 1 + title = 'USA Today' + __author__ = 'calibre' + description = 'newspaper' + encoding = 'utf-8' + publisher = 'usatoday.com' + category = 'news, usa' + language = 'en' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + feeds = [ ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'), @@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe): ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'), ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'), ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), - ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), + ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories') ] + keep_only_tags = [dict(attrs={'class':'story'})] + remove_tags = [ dict(attrs={'class':[ 'share', 'reprints', 'inline-h3', - 'info-extras', + 'info-extras rounded', + 'inset', 'ppy-outer', 'ppy-caption', 'comments', @@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe): 'tags', 'bottom-tools', 'sponsoredlinks', + 'corrections' ]}), + dict(name='ul', attrs={'class':'inside-copy'}), dict(id=['pluck']), - ] + dict(id=['updated']), + dict(id=['post-date-updated']) + ] def get_masthead_url(self): From 086582cb2d167945e875e76f8909c1931915352c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:25:19 +0200 Subject: [PATCH 05/33] Add 20minutes news website recipe --- recipes/20minutes.recipe | 70 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 recipes/20minutes.recipe diff --git a/recipes/20minutes.recipe b/recipes/20minutes.recipe new file mode 100644 index 0000000000..ec9121f2b5 --- /dev/null +++ b/recipes/20minutes.recipe @@ -0,0 +1,70 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' +''' +20minutes.fr +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Minutes(BasicNewsRecipe): + + title = '20 minutes' + __author__ = 'calibre' + description = 'Actualités' + encoding = 'cp1252' + publisher = '20minutes.fr' + category = 'Actualités, France, Monde' + language = 'fr' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['mn-section-heading']}), + dict(name='a', attrs={'href':['#commentaires']}), + dict(name='div', attrs={'class':['mn-right']}), + dict(name='div', attrs={'class':['mna-box']}), + dict(name='div', attrs={'class':['mna-comment-call']}), + dict(name='div', attrs={'class':['mna-tools']}), + dict(name='div', attrs={'class':['mn-trilist']}) + ] + + keep_only_tags = [dict(id='mn-article')] + + remove_tags_after = dict(name='div', attrs={'class':['mna-body','mna-signature']}) + + + feeds = [ + ('France', 'http://www.20minutes.fr/rss/actu-france.xml'), + ('International', 'http://www.20minutes.fr/rss/monde.xml'), + ('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'), + ('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'), + ('Economie', 'http://www.20minutes.fr/rss/economie.xml'), + ('Politique', 'http://www.20minutes.fr/rss/politique.xml'), + (u'Médias', 'http://www.20minutes.fr/rss/media.xml'), + ('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'), + ('People', 'http://www.20minutes.fr/rss/people.xml'), + ('Culture', 'http://www.20minutes.fr/rss/culture.xml'), + ('Sport', 'http://www.20minutes.fr/rss/sport.xml'), + ('Paris', 'http://www.20minutes.fr/rss/paris.xml'), + ('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'), + ('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml') + ] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup From 10680496d9b64bde6885f80f809dfe9fa3aedfdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:25:58 +0200 Subject: [PATCH 06/33] Add FrAndroid news website recipe --- recipes/frandroid.recipe | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 recipes/frandroid.recipe diff --git a/recipes/frandroid.recipe b/recipes/frandroid.recipe new file mode 100644 index 0000000000..38d164190b --- /dev/null +++ b/recipes/frandroid.recipe @@ -0,0 +1,7 @@ +class BasicUserRecipe1318572550(AutomaticNewsRecipe): + title = u'FrAndroid' + oldest_article = 2 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')] From 7f091a5ffe0af5e37dfd8a0175ae7f8cc7dec08e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:26:34 +0200 Subject: [PATCH 07/33] Add lepoint news website recipe --- recipes/lepoint.recipe | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 recipes/lepoint.recipe diff --git a/recipes/lepoint.recipe b/recipes/lepoint.recipe new file mode 100644 index 0000000000..2cdc42fa5f --- /dev/null +++ b/recipes/lepoint.recipe @@ -0,0 +1,75 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' +''' +LePoint.fr +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class lepoint(BasicNewsRecipe): + + title = 'Le Point' + __author__ = 'calibre' + description = 'Actualités' + encoding = 'utf-8' + publisher = 'LePoint.fr' + category = 'news, France, world' + language = 'fr' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} + .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['entete_chroniqueur']}), + dict(name='div', attrs={'class':['col_article']}), + dict(name='div', attrs={'class':['signature_article']}), + dict(name='div', attrs={'class':['util_font util_article']}), + dict(name='div', attrs={'class':['util_article bottom']}) + ] + + keep_only_tags = [dict(name='div', attrs={'class':['page_article']})] + + remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']}) + + feeds = [ + (u'À la une', 'http://www.lepoint.fr/rss.xml'), + ('International', 'http://www.lepoint.fr/monde/rss.xml'), + ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'), + ('Sciences', 'http://www.lepoint.fr/science/rss.xml'), + ('Economie', 'http://www.lepoint.fr/economie/rss.xml'), + (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'), + ('Politique', 'http://www.lepoint.fr/politique/rss.xml'), + (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'), + ('Culture', 'http://www.lepoint.fr/culture/rss.xml'), + (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'), + ('Sport', 'http://www.lepoint.fr/sport/rss.xml') + ] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_masthead_url(self): + masthead = 'http://www.lepoint.fr/images/commun/logo.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From fcdfe4a44a2edf787e4c776bf0a6af2fac9006fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:27:02 +0200 Subject: [PATCH 08/33] Add lexpress news website recipe --- recipes/lexpress.recipe | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 recipes/lexpress.recipe diff --git a/recipes/lexpress.recipe b/recipes/lexpress.recipe new file mode 100644 index 0000000000..faf6d46b6b --- /dev/null +++ b/recipes/lexpress.recipe @@ -0,0 +1,73 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' +''' +Lexpress.fr +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class lepoint(BasicNewsRecipe): + + title = 'L\'express' + __author__ = 'calibre' + description = 'Actualités' + encoding = 'cp1252' + publisher = 'LExpress.fr' + category = 'Actualité, France, Monde' + language = 'fr' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + #contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + .entete { font-weiht:bold;} + ''' + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['barre-outil-fb']}), + dict(name='div', attrs={'class':['barre-outils']}), + dict(id='bloc-sommaire'), + dict(id='footer-article') + ] + + keep_only_tags = [dict(name='div', attrs={'class':['bloc-article']})] + + remove_tags_after = dict(id='content-article') + + feeds = [ + (u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'), + ('International', 'http://www.lexpress.fr/rss/monde.xml'), + ('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'), + (u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'), + (u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'), + ('Economie', 'http://www.lepoint.fr/economie/rss.xml'), + (u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'), + ('Politique', 'http://www.lexpress.fr/rss/politique.xml'), + (u'Médias', 'http://www.lexpress.fr/rss/medias.xml'), + ('Culture', 'http://www.lexpress.fr/rss/culture.xml'), + ('Sport', 'http://www.lexpress.fr/rss/sport.xml') + ] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_masthead_url(self): + masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From 7bee77bbaa430077ca432c3eef4f7bcc32b14aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:27:33 +0200 Subject: [PATCH 09/33] Add zdnet.fr news website recipe --- recipes/zdnet.fr.recipe | 67 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 recipes/zdnet.fr.recipe diff --git a/recipes/zdnet.fr.recipe b/recipes/zdnet.fr.recipe new file mode 100644 index 0000000000..b5c1afe62f --- /dev/null +++ b/recipes/zdnet.fr.recipe @@ -0,0 +1,67 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' + +''' +Fetch zdnet.fr +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class zdnet(BasicNewsRecipe): + + title = 'ZDNet.fr' + __author__ = 'calibre' + description = 'Actualités' + encoding = 'utf-8' + publisher = 'ZDNet.fr' + category = 'Actualité, Informatique, IT' + language = 'fr' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['toolbox']}), + dict(name='div', attrs={'class':['clear clearfix']}), + dict(id='emailtoafriend'), + dict(id='storyaudio'), + dict(id='fbtwContainer'), + dict(name='h5') + ] + + remove_tags_before = dict(id='leftcol') + remove_tags_after = dict(id='content') + + feeds = [ + ('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'), + ('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'), + ('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/') + ] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_masthead_url(self): + masthead = 'http://www.zdnet.fr/images/base/logo.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From 4ca1db0c81c977a1fea4a2cef35714abf5cd933e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:28:07 +0200 Subject: [PATCH 10/33] Add omgubuntu news website recipe --- recipes/omgubuntu.recipe | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 recipes/omgubuntu.recipe diff --git a/recipes/omgubuntu.recipe b/recipes/omgubuntu.recipe new file mode 100644 index 0000000000..c5bf1fecef --- /dev/null +++ b/recipes/omgubuntu.recipe @@ -0,0 +1,17 @@ +class BasicUserRecipe1318619832(AutomaticNewsRecipe): + title = u'OmgUbuntu' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')] + + def get_masthead_url(self): + masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From 15c5ad672ca004b3f95c9a8ce252056a2f71c28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:28:39 +0200 Subject: [PATCH 11/33] Add phoronix blog/news website recipe --- recipes/phoronix.recipe | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 recipes/phoronix.recipe diff --git a/recipes/phoronix.recipe b/recipes/phoronix.recipe new file mode 100644 index 0000000000..3d3397d61f --- /dev/null +++ b/recipes/phoronix.recipe @@ -0,0 +1,46 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' + +''' +Fetch phoronix.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class cdnet(BasicNewsRecipe): + + title = 'Phoronix' + __author__ = 'calibre' + description = 'Actualités Phoronix' + encoding = 'utf-8' + publisher = 'Phoronix.com' + category = 'news, IT, linux' + language = 'en' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 25 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + remove_tags = [] + + remove_tags_before = dict(id='phxcms_content_phx') + remove_tags_after = dict(name='div', attrs={'class':'KonaBody'}) + + feeds = [('Phoronix', 'http://feeds.feedburner.com/Phoronix')] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + From dce91f13a234d1716f61f6f865af8e454bd52ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:29:20 +0200 Subject: [PATCH 12/33] Add Google Mobile blog website recipe --- recipes/googlemobileblog.recipe | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 recipes/googlemobileblog.recipe diff --git a/recipes/googlemobileblog.recipe b/recipes/googlemobileblog.recipe new file mode 100644 index 0000000000..5c897304d7 --- /dev/null +++ b/recipes/googlemobileblog.recipe @@ -0,0 +1,7 @@ +class BasicUserRecipe1318572445(AutomaticNewsRecipe): + title = u'Google Mobile Blog' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')] From a24ba3ff592e074de6555ea97c4a8cf28127fd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:29:56 +0200 Subject: [PATCH 13/33] Add korben blog website recipe --- recipes/korben.recipe | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 recipes/korben.recipe diff --git a/recipes/korben.recipe b/recipes/korben.recipe new file mode 100644 index 0000000000..62e50df78b --- /dev/null +++ b/recipes/korben.recipe @@ -0,0 +1,17 @@ +class BasicUserRecipe1318619728(AutomaticNewsRecipe): + title = u'Korben' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')] + + def get_masthead_url(self): + masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From 5660b08fbbfa5cb6059e044540c8dacd9a42420a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 13:39:28 +0200 Subject: [PATCH 14/33] Fix CNN website recipe * Add style * Add CNN logo * Fix problem with image gallery --- recipes/cnn.recipe | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/recipes/cnn.recipe b/recipes/cnn.recipe index 096c370706..6043f8b401 100644 --- a/recipes/cnn.recipe +++ b/recipes/cnn.recipe @@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe): #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html'] max_articles_per_feed = 25 + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + preprocess_regexps = [ (re.compile(r'', re.DOTALL), lambda m: ''), (re.compile(r'', re.DOTALL), lambda m: ''), @@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe): remove_tags = [ {'class':['cnn_strybtntools', 'cnn_strylftcntnt', 'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt', - 'cnn_strycntntrgt', 'hed_side', 'foot']}, + 'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']}, + {'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none', + 'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']}, + {'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev', + 'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']}, + {'style':['display:none']}, dict(id=['ie_column']), ] @@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe): ans = BasicNewsRecipe.get_article_url(self, article) return ans.partition('?')[0] + def get_masthead_url(self): + masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead From b9c6f154c0d2c2fbe249d2e2701043214deeb5fc Mon Sep 17 00:00:00 2001 From: Sengian Date: Sun, 16 Oct 2011 13:55:54 +0200 Subject: [PATCH 15/33] RTF: Improve empty paragraphs handling & clean html file --- resources/templates/rtf.xsl | 24 ++++++++++++++++-------- src/calibre/ebooks/rtf/input.py | 10 ++++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl index 7d48418776..61474701dc 100644 --- a/resources/templates/rtf.xsl +++ b/resources/templates/rtf.xsl @@ -1,7 +1,7 @@ - - - - - + + + + + + +   + + + @@ -149,7 +154,7 @@ - unamed + unnamed @@ -445,7 +450,10 @@ - # + + # + + diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index c1e649851b..5858824434 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin): html = 'index.xhtml' with open(html, 'wb') as f: res = transform.tostring(result) - res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + #clean multiple \n + res = re.sub('\n+', '\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines - res = re.sub('\s*', '', res) - res = re.sub('(?<=\n)\n{2}', - u'

\u00a0

\n'.encode('utf-8'), res) + # res = re.sub('\s*', '', res) + # res = re.sub('(?<=\n)\n{2}', + # u'

\u00a0

\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) From b027cafa1fcd56fcf0f72fdee3ffaefa0f25fb4c Mon Sep 17 00:00:00 2001 From: Sengian Date: Sun, 16 Oct 2011 13:58:56 +0200 Subject: [PATCH 16/33] RTF: remove unwanted localization --- src/calibre/ebooks/rtf2xml/ParseRtf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index e442a1c496..40945be975 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -376,13 +376,13 @@ class ParseRtf: msg += 'self.__run_level is "%s"\n' % self.__run_level raise RtfInvalidCodeException, msg if self.__run_level > 1: - sys.stderr.write(_('File could be older RTF...\n')) + sys.stderr.write('File could be older RTF...\n') if found_destination: if self.__run_level > 1: - sys.stderr.write(_( + sys.stderr.write( 'File also has newer RTF.\n' 'Will do the best to convert.\n' - )) + ) add_brackets_obj = add_brackets.AddBrackets( in_file = self.__temp_file, bug_handler = RtfInvalidCodeException, From 3c91e95a27490b760742de6e0879e9d33707bc02 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 Oct 2011 19:13:09 +0530 Subject: [PATCH 17/33] Fix #875384 (Android Device Not detected - Freescale i.MX515 USB device) --- src/calibre/devices/android/driver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index f6b70794d4..b0e0fa4008 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -49,6 +49,10 @@ class ANDROID(USBMS): 0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216], 0x70c6 : [0x226] }, + # Freescale + 0x15a2 : { + 0x0c01 : [0x226] + }, # Sony Ericsson 0xfce : { From 52f54d22f4423c0d2298c91a4fe68fd617b58343 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 Oct 2011 19:16:47 +0530 Subject: [PATCH 18/33] Fix #875257 (Add T-Mobile Move to Android driver) --- src/calibre/devices/android/driver.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index b0e0fa4008..c16e3e6f5c 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -54,6 +54,11 @@ class ANDROID(USBMS): 0x0c01 : [0x226] }, + # Alcatel + 0x05c6 : { + 0x9018 : [0x0226], + }, + # Sony Ericsson 0xfce : { 0xd12e : [0x0100], @@ -143,7 +148,8 @@ class ANDROID(USBMS): VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', - 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO'] + 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', + 'VIZIO', 'GOOGLE', 'FREESCAL'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -154,7 +160,7 @@ class ANDROID(USBMS): 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', - 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008'] + 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', From 4736f0df8af97fad8d311880a74e68b3d2f071a9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 Oct 2011 19:23:48 +0530 Subject: [PATCH 19/33] Fix #873288 (Python crashes on converting (X)html containing links with "generic" signs) --- src/calibre/ebooks/conversion/plumber.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 3a18b40539..defb2b837d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace', def unarchive(self, path, tdir): extract(path, tdir) files = list(walk(tdir)) + files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding) + for f in files] from calibre.customize.ui import available_input_formats fmts = available_input_formats() for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x) From 34e7adf904b1cb3fc5c135185f652d84e1607916 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2011 03:23:53 +0530 Subject: [PATCH 20/33] Fix El Pais --- recipes/el_pais.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/el_pais.recipe b/recipes/el_pais.recipe index 4da3384093..a481ffed47 100644 --- a/recipes/el_pais.recipe +++ b/recipes/el_pais.recipe @@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})] + keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})] extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} ' From 1f9c6f4adf0d63770f79b830f3d8717477986b9b Mon Sep 17 00:00:00 2001 From: Tom Scholl Date: Mon, 17 Oct 2011 00:11:49 +0100 Subject: [PATCH 21/33] Recipe changes. Added ekathemerini, fixed typo in kstar New recipe is for ekathimerini.com (english edition of a popular greek news site) --- recipes/ekathemerini.recipe | 57 +++++++++++++++++++++++++++++++++++++ recipes/kstar.recipe | 2 +- 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 recipes/ekathemerini.recipe diff --git a/recipes/ekathemerini.recipe b/recipes/ekathemerini.recipe new file mode 100644 index 0000000000..de8eb0939e --- /dev/null +++ b/recipes/ekathemerini.recipe @@ -0,0 +1,57 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag + +class Ekathimerini(BasicNewsRecipe): + title = 'ekathimerini' + description = 'News from Greece, English edition' + masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif' + max_articles_per_feed = 100 + oldest_article = 100 + publisher = 'Kathimerini' + category = 'news, GR' + language = 'en' + encoding = 'windows-1253' + conversion_options = { 'linearize_tables': True} + no_stylesheets = True + delay = 1 + keep_only_tags = [dict(name='td', attrs={'class':'news'})] + + rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml' + + def find_articles(self, idx, category): + for article in idx.findAll('item'): + cat = u'' + cat_elem = article.find('subcat') + if cat_elem: + cat = self.tag_to_string(cat_elem) + + if cat == category: + desc_html = self.tag_to_string(article.find('description')) + description = self.tag_to_string(BeautifulSoup(desc_html)) + + a = { + 'title': self.tag_to_string(article.find('title')), + 'url': self.tag_to_string(article.find('link')), + 'description': description, + 'date' : self.tag_to_string(article.find('pubdate')), + } + yield a + + + def parse_index(self): + idx_contents = self.browser.open(self.rss_url).read() + idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES) + + cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')])) + cats.sort() + + feeds = [(u'News',list(self.find_articles(idx, u'')))] + + for cat in cats: + feeds.append((cat.capitalize(), list(self.find_articles(idx, cat)))) + + return feeds + + def print_version(self, url): + return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/') + diff --git a/recipes/kstar.recipe b/recipes/kstar.recipe index fe44bfd679..6cf61a2484 100644 --- a/recipes/kstar.recipe +++ b/recipes/kstar.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1282101454(BasicNewsRecipe): - title = 'Kansascity Star' + title = 'Kansas City Star' language = 'en' __author__ = 'TonytheBookworm' description = 'www.kansascity.com feed' From 05275a30989102319d1725f985fd8b330b11ea1b Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Sun, 16 Oct 2011 22:59:55 -0300 Subject: [PATCH 22/33] Get the device path for matched books - the kobo stores kepubs in its own directory structure. This lays the groundwork for adding annotations support to the Kobo driver --- src/calibre/devices/kobo/driver.py | 5 +++++ src/calibre/devices/usbms/device.py | 3 +++ src/calibre/gui2/actions/annotate.py | 10 +++++++++- src/calibre/gui2/library/models.py | 9 ++++++--- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 0e11302ec3..ed22ebd9d5 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -77,6 +77,11 @@ class KOBO(USBMS): self.book_class = Book self.dbversion = 7 + def create_annotations_path(self, mdata, device_path=None): + if device_path: + return device_path + return USBMS.create_annotations_path(self, mdata) + def books(self, oncard=None, end_session=True): from calibre.ebooks.metadata.meta import path_to_ext diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 85ab5905b9..f1b8a9580a 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -1147,3 +1147,6 @@ class Device(DeviceConfig, DevicePlugin): os.makedirs(filedir) return filepath + + def create_annotations_path(self, mdata, device_path=None): + return self.create_upload_path(os.path.abspath('/'), mdata, 'x.bookmark', create_dirs=False) diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 1db532bfc4..5ae0860d3b 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -41,13 +41,21 @@ class FetchAnnotationsAction(InterfaceAction): fmts.append(format.lower()) return fmts + def get_device_path_from_id(id_): + paths = [] + for x in ('memory', 'card_a', 'card_b'): + x = getattr(self.gui, x+'_view').model() + paths += x.paths_for_db_ids(set([id_]), as_map=True)[id_] + return paths[0].path if paths else None + def generate_annotation_paths(ids, db, device): # Generate path templates # Individual storage mount points scanned/resolved in driver.get_annotations() path_map = {} for id in ids: + path = get_device_path_from_id(id) mi = db.get_metadata(id, index_is_id=True) - a_path = device.create_upload_path(os.path.abspath('/'), mi, 'x.bookmark', create_dirs=False) + a_path = device.create_annotations_path(mi, path) path_map[id] = dict(path=a_path, fmts=get_formats(id)) return path_map diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index a0870b1e8d..9c456ac771 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -1239,11 +1239,14 @@ class DeviceBooksModel(BooksModel): # {{{ def paths(self, rows): return [self.db[self.map[r.row()]].path for r in rows ] - def paths_for_db_ids(self, db_ids): - res = [] + def paths_for_db_ids(self, db_ids, as_map=False): + res = defaultdict(list) if as_map else [] for r,b in enumerate(self.db): if b.application_id in db_ids: - res.append((r,b)) + if as_map: + res[b.application_id].append(b) + else: + res.append((r,b)) return res def get_collections_with_ids(self): From 905d7af56f4c19bb242c3a5a5fbc4b29c49b08af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2011 10:21:04 +0530 Subject: [PATCH 23/33] Fix #815256 (Korea Times subscription return void articles) --- recipes/korea_herald.recipe | 71 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/recipes/korea_herald.recipe b/recipes/korea_herald.recipe index 829906338c..9851767512 100644 --- a/recipes/korea_herald.recipe +++ b/recipes/korea_herald.recipe @@ -1,36 +1,35 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Seongkyoun Yoo ' -''' -Profile to download KoreaHerald -''' -from calibre.web.feeds.news import BasicNewsRecipe - -class KoreaHerald(BasicNewsRecipe): - title = u'KoreaHerald' - language = 'en' - description = u'Korea Herald News articles' - __author__ = 'Seongkyoun Yoo' - oldest_article = 10 - recursions = 3 - max_articles_per_feed = 10 - no_stylesheets = True - keep_only_tags = [ - dict(id=['contentLeft', '_article']) - ] - - remove_tags = [ - dict(name='iframe'), - dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}), - dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}), - ] - - feeds = [ - ('All News','http://www.koreaherald.com/rss/020000000000.xml'), - ('National','http://www.koreaherald.com/rss/020100000000.xml'), - ('Business','http://www.koreaherald.com/rss/020200000000.xml'), - ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'), - ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'), - ('Sports','http://www.koreaherald.com/rss/020500000000.xml'), - ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'), - ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'), - ] +__license__ = 'GPL v3' +__copyright__ = '2011, Seongkyoun Yoo ' +''' +Profile to download KoreaHerald +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class KoreaHerald(BasicNewsRecipe): + title = u'KoreaHerald' + language = 'en' + description = u'Korea Herald News articles' + __author__ = 'Seongkyoun Yoo' + oldest_article = 15 + recursions = 3 + max_articles_per_feed = 15 + no_stylesheets = True + keep_only_tags = [ + dict(id=['contentLeft', '_article']) + ] + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}), + dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}), + ] + + feeds = [ + ('National','http://www.koreaherald.com/rss/020100000000.xml'), + ('Business','http://www.koreaherald.com/rss/020200000000.xml'), + ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'), + ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'), + ('Sports','http://www.koreaherald.com/rss/020500000000.xml'), + ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'), + ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'), + ] From 6d7ba7dfad64dc41af10615d0ee4e7b14ac25f75 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2011 10:30:23 +0530 Subject: [PATCH 24/33] Hankyoreh by Seongkyoun Yoo. Fixes #876125 (add news recipe for korean hankyoreh) --- recipes/hankyoreh.recipe | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 recipes/hankyoreh.recipe diff --git a/recipes/hankyoreh.recipe b/recipes/hankyoreh.recipe new file mode 100644 index 0000000000..3afd3dd5dd --- /dev/null +++ b/recipes/hankyoreh.recipe @@ -0,0 +1,50 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Seongkyoun Yoo ' +''' +Profile to download The Hankyoreh +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + + +class Hankyoreh(BasicNewsRecipe): + title = u'Hankyoreh' + language = 'ko' + description = u'The Hankyoreh News articles' + __author__ = 'Seongkyoun Yoo' + oldest_article = 5 + recursions = 1 + max_articles_per_feed = 5 + no_stylesheets = True + keep_only_tags = [ + dict(name='tr', attrs={'height':['60px']}), + dict(id=['fontSzArea']) + ] + remove_tags = [ + dict(target='_blank'), + dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}), + dict(name='iframe', attrs={'width':['590']}), + ] + remove_tags_after = [ + dict(target='_top') + ] + feeds = [ + ('All News','http://www.hani.co.kr/rss/'), + ('Politics','http://www.hani.co.kr/rss/politics/'), + ('Economy','http://www.hani.co.kr/rss/economy/'), + ('Society','http://www.hani.co.kr/rss/society/'), + ('International','http://www.hani.co.kr/rss/international/'), + ('Culture','http://www.hani.co.kr/rss/culture/'), + ('Sports','http://www.hani.co.kr/rss/sports/'), + ('Science','http://www.hani.co.kr/rss/science/'), + ('Opinion','http://www.hani.co.kr/rss/opinion/'), + ('Cartoon','http://www.hani.co.kr/rss/cartoon/'), + ('English Edition','http://www.hani.co.kr/rss/english_edition/'), + ('Specialsection','http://www.hani.co.kr/rss/specialsection/'), + ('Hanionly','http://www.hani.co.kr/rss/hanionly/'), + ('Hkronly','http://www.hani.co.kr/rss/hkronly/'), + ('Multihani','http://www.hani.co.kr/rss/multihani/'), + ('Lead','http://www.hani.co.kr/rss/lead/'), + ('Newsrank','http://www.hani.co.kr/rss/newsrank/'), + ] \ No newline at end of file From 74840ce44adecd8380a09c04816ec26ee28abe02 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2011 10:35:04 +0530 Subject: [PATCH 25/33] Hankyoreh21 by Seongkyoun Yoo. Fixes #876127 (add news recipe for korean hankyoreh21) --- recipes/hankyoreh21.recipe | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 recipes/hankyoreh21.recipe diff --git a/recipes/hankyoreh21.recipe b/recipes/hankyoreh21.recipe new file mode 100644 index 0000000000..85ded3b8e3 --- /dev/null +++ b/recipes/hankyoreh21.recipe @@ -0,0 +1,26 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Seongkyoun Yoo ' +''' +Profile to download The Hankyoreh +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Hankyoreh21(BasicNewsRecipe): + title = u'Hankyoreh21' + language = 'ko' + description = u'The Hankyoreh21 Magazine articles' + __author__ = 'Seongkyoun Yoo' + oldest_article = 20 + recursions = 1 + max_articles_per_feed = 120 + no_stylesheets = True + remove_javascript = True + keep_only_tags = [ + dict(name='font', attrs={'class':'t18bk'}), + dict(id=['fontSzArea']) + ] + + feeds = [ + ('Hani21','http://h21.hani.co.kr/rss/ '), + ] \ No newline at end of file From 10b2cd9827a135481a0a3085fd182af28aeea137 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2011 13:10:32 +0530 Subject: [PATCH 26/33] Kyugyhang by Seongkyoun Yoo. Fixes #876129 (add news recipe for korean kyunghyang) --- recipes/kyungyhang | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 recipes/kyungyhang diff --git a/recipes/kyungyhang b/recipes/kyungyhang new file mode 100644 index 0000000000..ac658b1cab --- /dev/null +++ b/recipes/kyungyhang @@ -0,0 +1,37 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Seongkyoun Yoo ' +''' +Profile to download The Kyungyhang +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Kyungyhang(BasicNewsRecipe): + title = u'Kyungyhang' + language = 'ko' + description = u'The Kyungyhang Shinmun articles' + __author__ = 'Seongkyoun Yoo' + oldest_article = 20 + recursions = 2 + max_articles_per_feed = 20 + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [ + dict(name='div', attrs ={'class':['article_title_wrap']}), + dict(name='div', attrs ={'class':['article_txt']}) + ] + + remove_tags_after = dict(id={'sub_bottom'}) + + remove_tags = [ + dict(name='iframe'), + dict(id={'TdHot'}), + dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}), + dict(name='dl', attrs={'class':['CL']}), + dict(name='ul', attrs={'class':['tab']}), + ] + + feeds = [ + ('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'), + ] \ No newline at end of file From f8ffc93889b9a12f558efadb2d9f55e7adcf0f11 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Oct 2011 07:42:04 +0530 Subject: [PATCH 27/33] ... --- src/calibre/manual/faq.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index cde07e66f9..6555092fd0 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -242,6 +242,10 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout. +.. note:: + As of iOS version 5 Stanza no longer works on Apple devices. Alternatives to Stanza are discussed `here `_. + + Using iBooks ************** @@ -251,7 +255,7 @@ Start the Safari browser and type in the IP address and port of the computer run Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. -You wills ee a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks. +You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks. With the USB cable + iTunes From c559b7396291617aa081b9731517f7bc3f3280c3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Oct 2011 14:19:21 +0530 Subject: [PATCH 28/33] Fix regression that broke reading metadata from CHM files --- src/calibre/ebooks/chm/input.py | 2 +- src/calibre/ebooks/chm/reader.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py index b5074e8a72..9aa8272ee9 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/chm/input.py @@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin): def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False): from calibre.ebooks.chm.reader import CHMReader log.debug('Opening CHM file') - rdr = CHMReader(chm_path, log, self.opts) + rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding) log.debug('Extracting CHM to %s' % output_dir) rdr.extract_content(output_dir, debug_dump=debug_dump) self._chm_reader = rdr diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 5f23ad0241..05ec388a9b 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -40,14 +40,14 @@ class CHMError(Exception): pass class CHMReader(CHMFile): - def __init__(self, input, log, opts): + def __init__(self, input, log, input_encoding=None): CHMFile.__init__(self) if isinstance(input, unicode): input = input.encode(filesystem_encoding) if not self.LoadCHM(input): raise CHMError("Unable to open CHM file '%s'"%(input,)) self.log = log - self.opts = opts + self.input_encoding = input_encoding self._sourcechm = input self._contents = None self._playorder = 0 @@ -156,8 +156,8 @@ class CHMReader(CHMFile): break def _reformat(self, data, htmlpath): - if self.opts.input_encoding: - data = data.decode(self.opts.input_encoding) + if self.input_encoding: + data = data.decode(self.input_encoding) try: data = xml_to_unicode(data, strip_encoding_pats=True)[0] soup = BeautifulSoup(data) From 186b781835996a0918b0638b73735f74c9f72473 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Oct 2011 19:17:11 +0530 Subject: [PATCH 29/33] Fix #874643 (Updated recipe for La Repubblica) --- recipes/la_republica.recipe | 76 ++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/recipes/la_republica.recipe b/recipes/la_republica.recipe index e55211c223..c1b0f3a463 100644 --- a/recipes/la_republica.recipe +++ b/recipes/la_republica.recipe @@ -1,32 +1,37 @@ __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' __copyright__ = '2009-2011, Darko Miletic , Lorenzo Vigentini ' -description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version' +description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version' ''' http://www.repubblica.it/ ''' import re +from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe class LaRepubblica(BasicNewsRecipe): - title = 'La Repubblica' - __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' - description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' - masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' - publisher = 'Gruppo editoriale L\'Espresso' - category = 'News, politics, culture, economy, general interest' - language = 'it' - timefmt = '[%a, %d %b, %Y]' - oldest_article = 5 - encoding = 'utf8' - use_embedded_content = False - #recursion = 10 - no_stylesheets = True - extra_css = """ - img{display: block} - """ + title = 'La Repubblica' + __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' + description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' + masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' + publisher = 'Gruppo editoriale L\'Espresso' + category = 'News, politics, culture, economy, general interest' + language = 'it' + timefmt = '[%a, %d %b, %Y]' + oldest_article = 5 + encoding = 'utf8' + use_embedded_content = False + no_stylesheets = True + publication_type = 'newspaper' + articles_are_obfuscated = True + temp_files = [] + extra_css = """ + img{display: block} + """ + + remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb'] preprocess_regexps = [ (re.compile(r'.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), @@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe): ] def get_article_url(self, article): - link = article.get('id', article.get('guid', None)) - if link is None: - return article - return link - + link = BasicNewsRecipe.get_article_url(self, article) + if link and not '.repubblica.it/' in link: + link2 = article.get('id', article.get('guid', None)) + if link2: + link = link2 + return link.rpartition('?')[0] + + def get_obfuscated_article(self, url): + count = 0 + while (count < 10): + try: + response = self.browser.open(url) + html = response.read() + count = 10 + except: + print "Retrying download..." + count += 1 + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write(html) + self.temp_files[-1].close() + return self.temp_files[-1].name + keep_only_tags = [ dict(attrs={'class':'articolo'}), dict(attrs={'class':'body-text'}), @@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe): remove_tags = [ - dict(name=['object','link','meta']), + dict(name=['object','link','meta','iframe','embed']), dict(name='span',attrs={'class':'linkindice'}), dict(name='div', attrs={'class':'bottom-mobile'}), dict(name='div', attrs={'id':['rssdiv','blocco']}), @@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe): (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml') ] + def preprocess_html(self, soup): + for item in soup.findAll(['hgroup','deresponsabilizzazione','per']): + item.name = 'div' + item.attrs = [] + for item in soup.findAll(style=True): + del item['style'] + return soup + From f31f109c23e2b7194549a0df90103a580b4c56fb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Oct 2011 05:49:28 +0530 Subject: [PATCH 30/33] ... --- recipes/los_tiempos_bo.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/los_tiempos_bo.recipe b/recipes/los_tiempos_bo.recipe index 00ddd9d7c1..1d61396a4f 100644 --- a/recipes/los_tiempos_bo.recipe +++ b/recipes/los_tiempos_bo.recipe @@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe): publication_type = 'newspaper' delay = 1 remove_empty_feeds = True - cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg') + cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg') masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg' extra_css = """ body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} From 68a29c213d997561f7c90a49c83a23e2b993d637 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Oct 2011 06:06:36 +0530 Subject: [PATCH 31/33] Update Ming Pao --- recipes/ming_pao.recipe | 112 ++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe index 856d7166ff..da7272ca2e 100644 --- a/recipes/ming_pao.recipe +++ b/recipes/ming_pao.recipe @@ -4,26 +4,27 @@ __copyright__ = '2010-2011, Eddie Lau' # Region - Hong Kong, Vancouver, Toronto __Region__ = 'Hong Kong' # Users of Kindle 3 with limited system-level CJK support -# please replace the following "True" with "False". +# please replace the following "True" with "False". (Default: True) __MakePeriodical__ = True -# Turn below to True if your device supports display of CJK titles +# Turn below to True if your device supports display of CJK titles (Default: False) __UseChineseTitle__ = False -# Set it to False if you want to skip images +# Set it to False if you want to skip images (Default: True) __KeepImages__ = True -# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source +# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True) __UseLife__ = True -# (HK only) It is to disable the column section which is now a premium content -__InclCols__ = False -# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats -__ParsePFF__ = False -# (HK only) Turn below to True if you wish hi-res images +# (HK only) It is to disable premium content (Default: False) +__InclPremium__ = False +# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True) +__ParsePFF__ = True +# (HK only) Turn below to True if you wish hi-res images (Default: False) __HiResImg__ = False ''' Change Log: +2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/04: option to get hi-res photos for the articles -2011/09/21: fetching "column" section is made optional. +2011/09/21: fetching "column" section is made optional. 2011/09/18: parse "column" section stuff from source text file directly. 2011/09/07: disable "column" section as it is no longer offered free. 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source @@ -72,7 +73,7 @@ class MPRecipe(BasicNewsRecipe): dict(attrs={'class':['content']}), # for content from txt dict(attrs={'class':['photo']}), dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com - dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com + dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com dict(attrs={'class':['images']}) # for images from txt ] if __KeepImages__: @@ -208,18 +209,21 @@ class MPRecipe(BasicNewsRecipe): (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal') ]: - articles = self.parse_section2(url, keystr) + if __InclPremium__ == True: + articles = self.parse_section2_txt(url, keystr) + else: + articles = self.parse_section2(url, keystr) if articles: feeds.append((title, articles)) - if __InclCols__ == True: + if __InclPremium__ == True: # parse column section articles directly from .txt files for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') ]: articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -253,10 +257,10 @@ class MPRecipe(BasicNewsRecipe): # feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]: - articles = self.parse_section2(url, keystr) + articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # articles = self.parse_section(url) @@ -270,18 +274,18 @@ class MPRecipe(BasicNewsRecipe): for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal') ]: - articles = self.parse_section2(url, keystr) + articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - - if __InclCols__ == True: + + if __InclPremium__ == True: # parse column section articles directly from .txt files for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') ]: articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -333,7 +337,7 @@ class MPRecipe(BasicNewsRecipe): url = 'http://news.mingpao.com/' + dateStr + '/' +url # replace the url to the print-friendly version if __ParsePFF__ == True: - if url.rfind('Redirect') <> -1: + if url.rfind('Redirect') <> -1 and __InclPremium__ == True: url = re.sub(dateStr + '.*' + dateStr, dateStr, url) url = re.sub('%2F.*%2F', '/', url) title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '') @@ -349,6 +353,8 @@ class MPRecipe(BasicNewsRecipe): # parse from life.mingpao.com def parse_section2(self, url, keystr): + br = mechanize.Browser() + br.set_handle_redirect(False) self.get_fetchdate() soup = self.index_to_soup(url) a = soup.findAll('a', href=True) @@ -359,9 +365,13 @@ class MPRecipe(BasicNewsRecipe): title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): - url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article - current_articles.append({'title': title, 'url': url, 'description': ''}) - included_urls.append(url) + try: + br.open_novisit(url) + url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + except: + print 'skipping a premium article' current_articles.reverse() return current_articles @@ -382,7 +392,7 @@ class MPRecipe(BasicNewsRecipe): included_urls.append(url) current_articles.reverse() return current_articles - + # parse from www.mingpaovan.com def parse_section3(self, url, baseUrl): self.get_fetchdate() @@ -470,23 +480,23 @@ class MPRecipe(BasicNewsRecipe): #raw_html = raw_html.replace(u'

\u3010', u'\u3010') if __HiResImg__ == True: # TODO: add a _ in front of an image url - if url.rfind('news.mingpao.com') > -1: + if url.rfind('news.mingpao.com') > -1: imglist = re.findall('src="?.*?jpg"', raw_html) br = mechanize.Browser() br.set_handle_redirect(False) for img in imglist: gifimg = img.replace('jpg"', 'gif"') - try: + try: br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1]) raw_html = raw_html.replace(img, gifimg) - except: + except: # find the location of the first _ pos = img.find('_') if pos > -1: # if found, insert _ after the first _ newimg = img[0:pos] + '_' + img[pos:] raw_html = raw_html.replace(img, newimg) - else: + else: # if not found, insert _ after " raw_html = raw_html.replace(img[1:], '"_' + img[1:]) elif url.rfind('life.mingpao.com') > -1: @@ -510,7 +520,7 @@ class MPRecipe(BasicNewsRecipe): pos = img.rfind('/') newimg = img[0:pos+1] + '_' + img[pos+1:] #print 'newimg: ', newimg - raw_html = raw_html.replace(img, newimg) + raw_html = raw_html.replace(img, newimg) if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1: return raw_html else: @@ -549,10 +559,11 @@ class MPRecipe(BasicNewsRecipe): photo = photo.replace('class="photo"', '') new_raw_html = new_raw_html + '

' + photo + '
' return new_raw_html + '' - else: + else: # .txt based file splitter = re.compile(r'\n') # Match non-digits new_raw_html = 'Untitled
' + next_is_mov_link = False next_is_img_txt = False title_started = False met_article_start_char = False @@ -561,24 +572,35 @@ class MPRecipe(BasicNewsRecipe): met_article_start_char = True new_raw_html = new_raw_html + '

' + item + '

\n' else: - if next_is_img_txt == False: - if item.startswith('='): + if next_is_img_txt == False and next_is_mov_link == False: + item = item.strip() + if item.startswith("=@"): + next_is_mov_link = True + elif item.startswith("=?"): + next_is_img_txt = True + new_raw_html += '

\n' + elif item.startswith('='): next_is_img_txt = True new_raw_html += '

\n' else: - if met_article_start_char == False: - if title_started == False: - new_raw_html = new_raw_html + '

' + item + '\n' - title_started = True + if item <> '': + if next_is_img_txt == False and met_article_start_char == False: + if title_started == False: + #print 'Title started at ', item + new_raw_html = new_raw_html + '
' + item + '\n' + title_started = True + else: + new_raw_html = new_raw_html + item + '\n' else: - new_raw_html = new_raw_html + item + '\n' - else: - new_raw_html = new_raw_html + item + '

\n' + new_raw_html = new_raw_html + item + '

\n' else: - next_is_img_txt = False - new_raw_html = new_raw_html + item + '\n' + if next_is_mov_link == True: + next_is_mov_link = False + else: + next_is_img_txt = False + new_raw_html = new_raw_html + item + '\n' return new_raw_html + '

' - + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] @@ -587,7 +609,7 @@ class MPRecipe(BasicNewsRecipe): for item in soup.findAll(stype=True): del item['absmiddle'] return soup - + def create_opf(self, feeds, dir=None): if dir is None: dir = self.output_dir @@ -678,7 +700,7 @@ class MPRecipe(BasicNewsRecipe): if po is None: self.play_order_counter += 1 po = self.play_order_counter - parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'), + parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), play_order=po, author=auth, description=desc) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) for sp in a.sub_pages: From 6f4a86b36e0807583f4e7b8033985970625e8477 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Oct 2011 06:16:57 +0530 Subject: [PATCH 32/33] ... --- src/calibre/devices/kobo/driver.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index ed22ebd9d5..4c2cb2f566 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Timothy Legge and Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, shutil +import os import sqlite3 as sqlite from contextlib import closing from calibre.devices.usbms.books import BookList @@ -756,9 +756,12 @@ class KOBO(USBMS): blists = {} for i in paths: - if booklists[i] is not None: - #debug_print('Booklist: ', i) - blists[i] = booklists[i] + try: + if booklists[i] is not None: + #debug_print('Booklist: ', i) + blists[i] = booklists[i] + except IndexError: + pass opts = self.settings() if opts.extra_customization: collections = [x.lower().strip() for x in @@ -872,14 +875,14 @@ class KOBO(USBMS): debug_print("ImageID could not be retreived from the database") def prepare_addable_books(self, paths): - ''' + ''' The Kobo supports an encrypted epub refered to as a kepub Unfortunately Kobo decided to put the files on the device - with no file extension. I just hope that decision causes + with no file extension. I just hope that decision causes them as much grief as it does me :-) This has to make a temporary copy of the book files with a - epub extension to allow Calibre's normal processing to + epub extension to allow Calibre's normal processing to deal with the file appropriately ''' for idx, path in enumerate(paths): From d1ef8de37b73ce33fe6a170f7b29954a091da1eb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Oct 2011 10:06:51 +0530 Subject: [PATCH 33/33] Refactor annotations code --- src/calibre/devices/interface.py | 2 +- src/calibre/devices/kindle/driver.py | 117 ++++++++++++++ src/calibre/devices/usbms/device.py | 6 + src/calibre/gui2/actions/annotate.py | 229 ++++++++------------------- 4 files changed, 188 insertions(+), 166 deletions(-) diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py index bed5a0b77c..d9b52ad9a4 100644 --- a/src/calibre/devices/interface.py +++ b/src/calibre/devices/interface.py @@ -62,7 +62,7 @@ class DevicePlugin(Plugin): #: Icon for this device icon = I('reader.png') - # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations() + # Encapsulates an annotation fetched from the device UserAnnotation = namedtuple('Annotation','type, value') #: GUI displays this as a message if not None. Useful if opening can take a diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 3ce69dba1e..43718e7205 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib from calibre.devices.kindle.apnx import APNXBuilder from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.usbms.driver import USBMS +from calibre.ebooks.metadata import MetaInformation +from calibre import strftime ''' Notes on collections: @@ -164,6 +166,121 @@ class KINDLE(USBMS): # This returns as job.result in gui2.ui.annotations_fetched(self,job) return bookmarked_books + def generate_annotation_html(self, bookmark): + from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString + # Returns
...
+ last_read_location = bookmark.last_read_location + timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp) + percent_read = bookmark.percent_read + + ka_soup = BeautifulSoup() + dtc = 0 + divTag = Tag(ka_soup,'div') + divTag['class'] = 'user_annotations' + + # Add the last-read location + spanTag = Tag(ka_soup, 'span') + spanTag['style'] = 'font-weight:bold' + if bookmark.book_format == 'pdf': + spanTag.insert(0,NavigableString( + _("%(time)s
Last Page Read: %(loc)d (%(pr)d%%)") % \ + dict(time=strftime(u'%x', timestamp.timetuple()), + loc=last_read_location, + pr=percent_read))) + else: + spanTag.insert(0,NavigableString( + _("%(time)s
Last Page Read: Location %(loc)d (%(pr)d%%)") % \ + dict(time=strftime(u'%x', timestamp.timetuple()), + loc=last_read_location, + pr=percent_read))) + + divTag.insert(dtc, spanTag) + dtc += 1 + divTag.insert(dtc, Tag(ka_soup,'br')) + dtc += 1 + + if bookmark.user_notes: + user_notes = bookmark.user_notes + annotations = [] + + # Add the annotations sorted by location + # Italicize highlighted text + for location in sorted(user_notes): + if user_notes[location]['text']: + annotations.append( + _('Location %(dl)d • %(typ)s
%(text)s
') % \ + dict(dl=user_notes[location]['displayed_location'], + typ=user_notes[location]['type'], + text=(user_notes[location]['text'] if \ + user_notes[location]['type'] == 'Note' else \ + '%s' % user_notes[location]['text']))) + else: + if bookmark.book_format == 'pdf': + annotations.append( + _('Page %(dl)d • %(typ)s
') % \ + dict(dl=user_notes[location]['displayed_location'], + typ=user_notes[location]['type'])) + else: + annotations.append( + _('Location %(dl)d • %(typ)s
') % \ + dict(dl=user_notes[location]['displayed_location'], + typ=user_notes[location]['type'])) + + for annotation in annotations: + divTag.insert(dtc, annotation) + dtc += 1 + + ka_soup.insert(0,divTag) + return ka_soup + + + def add_annotation_to_library(self, db, db_id, annotation): + from calibre.ebooks.BeautifulSoup import Tag + bm = annotation + ignore_tags = set(['Catalog', 'Clippings']) + + if bm.type == 'kindle_bookmark': + mi = db.get_metadata(db_id, index_is_id=True) + user_notes_soup = self.generate_annotation_html(bm.value) + if mi.comments: + a_offset = mi.comments.find('
') + ad_offset = mi.comments.find('
') + + if a_offset >= 0: + mi.comments = mi.comments[:a_offset] + if ad_offset >= 0: + mi.comments = mi.comments[:ad_offset] + if set(mi.tags).intersection(ignore_tags): + return + if mi.comments: + hrTag = Tag(user_notes_soup,'hr') + hrTag['class'] = 'annotations_divider' + user_notes_soup.insert(0, hrTag) + + mi.comments += unicode(user_notes_soup.prettify()) + else: + mi.comments = unicode(user_notes_soup.prettify()) + # Update library comments + db.set_comment(db_id, mi.comments) + + # Add bookmark file to db_id + db.add_format_with_hooks(db_id, bm.value.bookmark_extension, + bm.value.path, index_is_id=True) + elif bm.type == 'kindle_clippings': + # Find 'My Clippings' author=Kindle in database, or add + last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple()) + mc_id = list(db.data.search_getting_ids('title:"My Clippings"', '')) + if mc_id: + db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], + index_is_id=True) + mi = db.get_metadata(mc_id[0], index_is_id=True) + mi.comments = last_update + db.set_metadata(mc_id[0], mi) + else: + mi = MetaInformation('My Clippings', authors = ['Kindle']) + mi.tags = ['Clippings'] + mi.comments = last_update + db.add_books([bm.value['path']], ['txt'], [mi]) class KINDLE2(KINDLE): diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index f1b8a9580a..e6120f337f 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -1068,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin): ''' return {} + def add_annotation_to_library(self, db, db_id, annotation): + ''' + Add an annotation to the calibre library + ''' + pass + def create_upload_path(self, path, mdata, fname, create_dirs=True): path = os.path.abspath(path) maxlen = self.MAX_PATH_LEN diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 4d8f462545..8b78dbc321 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -5,14 +5,57 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import datetime from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt from calibre.gui2 import error_dialog -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString -from calibre import strftime from calibre.gui2.actions import InterfaceAction +from calibre.devices.usbms.device import Device +from calibre.gui2.dialogs.progress import ProgressDialog + +class Updater(QThread): # {{{ + + update_progress = pyqtSignal(int) + update_done = pyqtSignal() + + def __init__(self, parent, db, device, annotation_map, done_callback): + QThread.__init__(self, parent) + self.errors = {} + self.db = db + self.keep_going = True + self.pd = ProgressDialog(_('Merging user annotations into database'), '', + 0, len(annotation_map), parent=parent) + + self.device = device + self.annotation_map = annotation_map + self.done_callback = done_callback + self.pd.canceled_signal.connect(self.canceled) + self.pd.setModal(True) + self.pd.show() + self.update_progress.connect(self.pd.set_value, + type=Qt.QueuedConnection) + self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection) + + def canceled(self): + self.keep_going = False + self.pd.hide() + + def run(self): + for i, id_ in enumerate(self.annotation_map): + if not self.keep_going: + break + bm = Device.UserAnnotation(self.annotation_map[id_][0], + self.annotation_map[id_][1]) + try: + self.device.add_annotation_to_library(self.db, id_, bm) + except: + import traceback + self.errors[id_] = traceback.format_exc() + self.update_progress.emit(i) + self.update_done.emit() + self.done_callback(self.annotation_map.keys(), self.errors) + +# }}} class FetchAnnotationsAction(InterfaceAction): @@ -86,166 +129,6 @@ class FetchAnnotationsAction(InterfaceAction): path_map) def annotations_fetched(self, job): - from calibre.devices.usbms.device import Device - from calibre.ebooks.metadata import MetaInformation - from calibre.gui2.dialogs.progress import ProgressDialog - from calibre.library.cli import do_add_format - - class Updater(QThread): # {{{ - - update_progress = pyqtSignal(int) - update_done = pyqtSignal() - FINISHED_READING_PCT_THRESHOLD = 96 - - def __init__(self, parent, db, annotation_map, done_callback): - QThread.__init__(self, parent) - self.db = db - self.pd = ProgressDialog(_('Merging user annotations into database'), '', - 0, len(job.result), parent=parent) - - self.am = annotation_map - self.done_callback = done_callback - self.pd.canceled_signal.connect(self.canceled) - self.pd.setModal(True) - self.pd.show() - self.update_progress.connect(self.pd.set_value, - type=Qt.QueuedConnection) - self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection) - - def generate_annotation_html(self, bookmark): - # Returns
...
- last_read_location = bookmark.last_read_location - timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp) - percent_read = bookmark.percent_read - - ka_soup = BeautifulSoup() - dtc = 0 - divTag = Tag(ka_soup,'div') - divTag['class'] = 'user_annotations' - - # Add the last-read location - spanTag = Tag(ka_soup, 'span') - spanTag['style'] = 'font-weight:bold' - if bookmark.book_format == 'pdf': - spanTag.insert(0,NavigableString( - _("%(time)s
Last Page Read: %(loc)d (%(pr)d%%)") % \ - dict(time=strftime(u'%x', timestamp.timetuple()), - loc=last_read_location, - pr=percent_read))) - else: - spanTag.insert(0,NavigableString( - _("%(time)s
Last Page Read: Location %(loc)d (%(pr)d%%)") % \ - dict(time=strftime(u'%x', timestamp.timetuple()), - loc=last_read_location, - pr=percent_read))) - - divTag.insert(dtc, spanTag) - dtc += 1 - divTag.insert(dtc, Tag(ka_soup,'br')) - dtc += 1 - - if bookmark.user_notes: - user_notes = bookmark.user_notes - annotations = [] - - # Add the annotations sorted by location - # Italicize highlighted text - for location in sorted(user_notes): - if user_notes[location]['text']: - annotations.append( - _('Location %(dl)d • %(typ)s
%(text)s
') % \ - dict(dl=user_notes[location]['displayed_location'], - typ=user_notes[location]['type'], - text=(user_notes[location]['text'] if \ - user_notes[location]['type'] == 'Note' else \ - '%s' % user_notes[location]['text']))) - else: - if bookmark.book_format == 'pdf': - annotations.append( - _('Page %(dl)d • %(typ)s
') % \ - dict(dl=user_notes[location]['displayed_location'], - typ=user_notes[location]['type'])) - else: - annotations.append( - _('Location %(dl)d • %(typ)s
') % \ - dict(dl=user_notes[location]['displayed_location'], - typ=user_notes[location]['type'])) - - for annotation in annotations: - divTag.insert(dtc, annotation) - dtc += 1 - - ka_soup.insert(0,divTag) - return ka_soup - - ''' - def mark_book_as_read(self,id): - read_tag = gprefs.get('catalog_epub_mobi_read_tag') - if read_tag: - self.db.set_tags(id, [read_tag], append=True) - ''' - - def canceled(self): - self.pd.hide() - - def run(self): - ignore_tags = set(['Catalog','Clippings']) - for (i, id) in enumerate(self.am): - bm = Device.UserAnnotation(self.am[id][0],self.am[id][1]) - if bm.type == 'kindle_bookmark': - mi = self.db.get_metadata(id, index_is_id=True) - user_notes_soup = self.generate_annotation_html(bm.value) - if mi.comments: - a_offset = mi.comments.find('
') - ad_offset = mi.comments.find('
') - - if a_offset >= 0: - mi.comments = mi.comments[:a_offset] - if ad_offset >= 0: - mi.comments = mi.comments[:ad_offset] - if set(mi.tags).intersection(ignore_tags): - continue - if mi.comments: - hrTag = Tag(user_notes_soup,'hr') - hrTag['class'] = 'annotations_divider' - user_notes_soup.insert(0,hrTag) - - mi.comments += user_notes_soup.prettify() - else: - mi.comments = unicode(user_notes_soup.prettify()) - # Update library comments - self.db.set_comment(id, mi.comments) - - ''' - # Update 'read' tag except for Catalogs/Clippings - if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD: - if not set(mi.tags).intersection(ignore_tags): - self.mark_book_as_read(id) - ''' - - # Add bookmark file to id - self.db.add_format_with_hooks(id, bm.value.bookmark_extension, - bm.value.path, index_is_id=True) - self.update_progress.emit(i) - elif bm.type == 'kindle_clippings': - # Find 'My Clippings' author=Kindle in database, or add - last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple()) - mc_id = list(db.data.parse('title:"My Clippings"')) - if mc_id: - do_add_format(self.db, mc_id[0], 'TXT', bm.value['path']) - mi = self.db.get_metadata(mc_id[0], index_is_id=True) - mi.comments = last_update - self.db.set_metadata(mc_id[0], mi) - else: - mi = MetaInformation('My Clippings', authors = ['Kindle']) - mi.tags = ['Clippings'] - mi.comments = last_update - self.db.add_books([bm.value['path']], ['txt'], [mi]) - - self.update_done.emit() - self.done_callback(self.am.keys()) - - # }}} if not job.result: return @@ -254,9 +137,25 @@ class FetchAnnotationsAction(InterfaceAction): _('User annotations generated from main library only'), show=True) db = self.gui.library_view.model().db + device = self.gui.device_manager.device - self.__annotation_updater = Updater(self.gui, db, job.result, - self.Dispatcher(self.gui.library_view.model().refresh_ids)) + self.__annotation_updater = Updater(self.gui, db, device, job.result, + self.Dispatcher(self.annotations_updated)) self.__annotation_updater.start() + def annotations_updated(self, ids, errors): + self.gui.library_view.model().refresh_ids(ids) + if errors: + db = self.gui.library_view.model().db + entries = [] + for id_, tb in errors.iteritems(): + title = id_ + if isinstance(id_, type(1)): + title = db.title(id_, index_is_id=True) + entries.extend([title, tb, '']) + error_dialog(self.gui, _('Some errors'), + _('Could not fetch annotations for some books. Click ' + 'show details to see which ones.'), + det_msg='\n'.join(entries), show=True) +