From 09590d9942ef3d2cb81e2e4c4577798aa6e00bd1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 12:52:50 -0600 Subject: [PATCH 1/6] Fix #7076 (The Times Online Empty Articles + Login/Password Fields Missing) --- resources/recipes/times_online.recipe | 181 +++++++++++++------------- 1 file changed, 92 insertions(+), 89 deletions(-) diff --git a/resources/recipes/times_online.recipe b/resources/recipes/times_online.recipe index a57749c79d..1ae8789cd5 100644 --- a/resources/recipes/times_online.recipe +++ b/resources/recipes/times_online.recipe @@ -1,103 +1,106 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' -timesonline.co.uk +www.thetimes.co.uk ''' -import re - +import urllib from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag -class Timesonline(BasicNewsRecipe): - title = 'The Times Online' - __author__ = 'Darko Miletic and Sujata Raman' - description = 'UK news' - publisher = 'timesonline.co.uk' - category = 'news, politics, UK' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - simultaneous_downloads = 1 - encoding = 'ISO-8859-1' - remove_javascript = True - language = 'en_GB' - recursions = 9 - match_regexps = [r'http://www.timesonline.co.uk/.*page=[2-9]'] +class TimesOnline(BasicNewsRecipe): + title = 'The Times UK' + __author__ = 'Darko Miletic' + description = 'news from United Kingdom and World' + language = 'en_GB' + publisher = 'Times Newspapers Ltd' + category = 'news, politics, UK' + oldest_article = 3 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + delay = 1 + needs_subscription = True + publication_type = 'newspaper' + masthead_url = 'http://www.thetimes.co.uk/tto/public/img/the_times_460.gif' + INDEX = 'http://www.thetimes.co.uk' + PREFIX = u'http://www.thetimes.co.uk/tto/' + extra_css = """ + .f-ha{font-size: xx-large; font-weight: bold} + .f-author{font-family: Arial,Helvetica,sans-serif} + .caption{font-size: small} + body{font-family: Georgia,"Times New Roman",Times,serif} + """ + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - keep_only_tags = [ - dict(name='div', attrs= {'id':['region-column1and2-layout2']}), - {'class' : ['subheading']}, - dict(name='div', attrs= {'id':['dynamic-image-holder']}), - dict(name='div', attrs= {'class':['article-author']}), - dict(name='div', attrs= {'id':['related-article-links']}), + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open('http://www.timesplus.co.uk/tto/news/?login=false&url=http://www.thetimes.co.uk/tto/news/?lightbox=false') + if self.username is not None and self.password is not None: + data = urllib.urlencode({ 'userName':self.username + ,'password':self.password + ,'keepMeLoggedIn':'false' + }) + br.open('https://www.timesplus.co.uk/iam/app/authenticate',data) + return br + + remove_tags = [ + dict(name=['object','link','iframe','base','meta']) + ,dict(attrs={'class':'tto-counter' }) ] + remove_attributes=['lang'] + keep_only_tags = [ + dict(attrs={'class':'heading' }) + ,dict(attrs={'class':'f-author'}) + ,dict(attrs={'id':'bodycopy'}) + ] - remove_tags = [ - dict(name=['embed','object','form','iframe']), - dict(name='span', attrs = {'class':'float-left padding-left-8 padding-top-2'}), - dict(name='div', attrs= {'id':['region-footer','region-column2-layout2','grid-column4','login-status','comment-sort-order']}), - dict(name='div', attrs= {'class': ['debate-quote-container','clear','your-comment','float-left related-attachements-container','float-left padding-bottom-5 padding-top-8','puff-top']}), - dict(name='span', attrs = {'id': ['comment-count']}), - dict(name='ul',attrs = {'id': 'read-all-comments'}), - dict(name='a', attrs = {'class':'reg-bold'}), - ] - - - extra_css = ''' - .small{font-family :Arial,Helvetica,sans-serif; font-size:x-small;} - .byline{font-family :Arial,Helvetica,sans-serif; font-size:x-small; background:#F8F1D8;} - .color-666{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666; } - h1{font-family:Georgia,Times New Roman,Times,serif;font-size:large; } - .color-999 {color:#999999;} - .x-small {font-size:x-small;} - #related-article-links{font-family :Arial,Helvetica,sans-serif; font-size:small;} - h2{color:#333333;font-family :Georgia,Times New Roman,Times,serif; font-size:small;} - p{font-family :Arial,Helvetica,sans-serif; font-size:small;} - ''' - - feeds = [ - (u'Top stories from Times Online', u'http://www.timesonline.co.uk/tol/feeds/rss/topstories.xml' ), - ('Latest Business News', 'http://www.timesonline.co.uk/tol/feeds/rss/business.xml'), - ('Economics', 'http://www.timesonline.co.uk/tol/feeds/rss/economics.xml'), - ('World News', 'http://www.timesonline.co.uk/tol/feeds/rss/worldnews.xml'), - ('UK News', 'http://www.timesonline.co.uk/tol/feeds/rss/uknews.xml'), - ('Travel News', 'http://www.timesonline.co.uk/tol/feeds/rss/travel.xml'), - ('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'), - ('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'), - ('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'), - ('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'), - ] - - def get_cover_url(self): - cover_url = None - index = 'http://www.timesonline.co.uk/tol/newspapers/' - soup = self.index_to_soup(index) - link_item = soup.find(name = 'div',attrs ={'class': "float-left margin-right-15"}) - if link_item: - cover_url = link_item.img['src'] - return cover_url - - def get_article_url(self, article): - return article.get('guid', None) - + feeds = [ + (u'UK News' , PREFIX + u'news/uk/?view=list' ) + ,(u'World' , PREFIX + u'news/world/?view=list' ) + ,(u'Politics' , PREFIX + u'news/politics/?view=list') + ,(u'Health' , PREFIX + u'health/news/?view=list' ) + ,(u'Education' , PREFIX + u'education/?view=list' ) + ,(u'Technology' , PREFIX + u'technology/?view=list' ) + ,(u'Science' , PREFIX + u'science/?view=list' ) + ,(u'Environment' , PREFIX + u'environment/?view=list' ) + ,(u'Faith' , PREFIX + u'faith/?view=list' ) + ,(u'Opinion' , PREFIX + u'opinion/?view=list' ) + ,(u'Sport' , PREFIX + u'sport/?view=list' ) + ,(u'Business' , PREFIX + u'business/?view=list' ) + ,(u'Money' , PREFIX + u'money/?view=list' ) + ,(u'Life' , PREFIX + u'life/?view=list' ) + ,(u'Arts' , PREFIX + u'arts/?view=list' ) + ] def preprocess_html(self, soup): - soup.html['xml:lang'] = self.language - soup.html['lang'] = self.language - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.language)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=ISO-8859-1")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] return self.adeify_images(soup) - def postprocess_html(self,soup,first): - for tag in soup.findAll(text = ['Previous Page','Next Page']): - tag.extract() - return soup - - + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('td', attrs={'class':'title'}): + atag = item.find('a') + url = self.INDEX + atag['href'] + title = self.tag_to_string(atag) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds From c47cdad1fc45c0cf34b49fe404be8a42106c7fe9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 12:57:33 -0600 Subject: [PATCH 2/6] Sigh --- src/calibre/gui2/actions/choose_library.py | 14 ++++++++++---- src/calibre/gui2/layout.py | 18 ++++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 39f7159989..044cbcdf85 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -147,26 +147,32 @@ class ChooseLibraryAction(InterfaceAction): self.qs_locations = [i[1] for i in locations] self.rename_menu.clear() self.delete_menu.clear() - quick_actions = [] + quick_actions, rename_actions, delete_actions = [], [], [] for name, loc in locations: ac = self.quick_menu.addAction(name, Dispatcher(partial(self.switch_requested, loc))) quick_actions.append(ac) - self.rename_menu.addAction(name, Dispatcher(partial(self.rename_requested, + ac = self.rename_menu.addAction(name, Dispatcher(partial(self.rename_requested, name, loc))) - self.delete_menu.addAction(name, Dispatcher(partial(self.delete_requested, + rename_actions.append(ac) + ac = self.delete_menu.addAction(name, Dispatcher(partial(self.delete_requested, name, loc))) + delete_actions.append(ac) + qs_actions = [] for i, x in enumerate(locations[:len(self.switch_actions)]): name, loc = x ac = self.switch_actions[i] ac.setText(name) ac.setVisible(True) + qs_actions.append(ac) self.quick_menu_action.setVisible(bool(locations)) self.rename_menu_action.setVisible(bool(locations)) self.delete_menu_action.setVisible(bool(locations)) - self.gui.location_manager.set_switch_actions(quick_actions) + self.gui.location_manager.set_switch_actions(quick_actions, + rename_actions, delete_actions, qs_actions, + self.action_choose) def location_selected(self, loc): diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index 0cd93f388c..89d763ef4c 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -24,7 +24,6 @@ class LocationManager(QObject): # {{{ locations_changed = pyqtSignal() unmount_device = pyqtSignal() location_selected = pyqtSignal(object) - switch_actions_set = pyqtSignal(object) def __init__(self, parent=None): QObject.__init__(self, parent) @@ -70,12 +69,23 @@ class LocationManager(QObject): # {{{ ac('cardb', _('Card B'), 'sd.png', _('Show books in storage card B')) - def set_switch_actions(self, actions): + def set_switch_actions(self, quick_actions, rename_actions, delete_actions, + switch_actions, choose_action): self.switch_menu = QMenu() - for ac in actions: + self.switch_menu.addAction(choose_action) + self.cs_menus = [] + for t, acs in [(_('Quick switch'), quick_actions), + (_('Rename library'), rename_actions), + (_('Delete library'), delete_actions)]: + if acs: + self.cs_menus.append(QMenu(t)) + for ac in acs: + self.cs_menus[-1].addAction(ac) + self.switch_menu.addMenu(self.cs_menus[-1]) + self.switch_menu.addSeparator() + for ac in switch_actions: self.switch_menu.addAction(ac) self.library_action.setMenu(self.switch_menu) - self.switch_actions_set.emit(bool(actions)) def _location_selected(self, location, *args): if location != self.current_location and hasattr(self, From bd7e82b124af6b39982b6d154ffa645277c51657 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 13:06:35 -0600 Subject: [PATCH 3/6] Il Fatto Quotidiano by egilh. Fixes #405 (New news feed) --- resources/recipes/il_fatto.recipe | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 resources/recipes/il_fatto.recipe diff --git a/resources/recipes/il_fatto.recipe b/resources/recipes/il_fatto.recipe new file mode 100644 index 0000000000..69ad645b94 --- /dev/null +++ b/resources/recipes/il_fatto.recipe @@ -0,0 +1,30 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286477122(BasicNewsRecipe): + title = u'Il Fatto Quotidiano' + oldest_article = 7 + max_articles_per_feed = 25 + language = 'it' + __author__ = 'egilh' + + feeds = [ + (u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'), + (u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'), + (u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'), + (u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'), + (u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'), + (u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'), + (u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'), + (u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'), + (u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'), + (u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'), + (u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'), + (u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'), + (u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'), + (u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'), + (u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'), + (u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'), + (u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/') +] + From fe6816282fd987aa8963ccf57f958fe462f42b22 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 14:29:32 -0600 Subject: [PATCH 4/6] Fix #7071 (Download of metadata for multiple books has strange behaivour) --- src/calibre/devices/apple/driver.py | 1 - src/calibre/gui2/metadata.py | 10 ++++++++-- src/calibre/library/cli.py | 1 - 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 0fd2bbcc2c..2ffe6399e4 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -19,7 +19,6 @@ from calibre.ebooks.metadata.epub import set_metadata from calibre.library.server.utils import strftime from calibre.utils.config import config_dir, prefs from calibre.utils.date import isoformat, now, parse_date -from calibre.utils.localization import get_lang from calibre.utils.logging import Log from calibre.utils.zipfile import ZipFile diff --git a/src/calibre/gui2/metadata.py b/src/calibre/gui2/metadata.py index b11e2ad28a..45cda50433 100644 --- a/src/calibre/gui2/metadata.py +++ b/src/calibre/gui2/metadata.py @@ -144,10 +144,10 @@ class DownloadMetadata(Thread): def commit_covers(self, all=False): if all: - self.worker.jobs.put(False) + self.worker.jobs.put((False, False)) while True: try: - id, fmi, ok, cdata = self.worker.results.get(False) + id, fmi, ok, cdata = self.worker.results.get_nowait() if ok: self.fetched_covers[id] = cdata self.results.put((id, 'cover', ok, fmi.title)) @@ -210,6 +210,12 @@ class DoDownload(QObject): pass if not self.downloader.is_alive(): self.timer.stop() + while True: + try: + r = self.downloader.results.get_nowait() + self.handle_result(r) + except Empty: + break self.pd.accept() def handle_result(self, r): diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index c1a1109cf8..a11d81cc8c 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -1010,7 +1010,6 @@ def command_restore_database(args, dbpath): 'saved to', name) def list_categories_option_parser(): - from calibre.library.check_library import CHECKS parser = get_parser(_('''\ %prog list_categories [options] From ea83fd852d0d5f6b54cbcabf1dbe2897d933da3f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 14:51:38 -0600 Subject: [PATCH 5/6] Remove cover_data from SERIALIZABLE_FIELDS --- src/calibre/ebooks/metadata/book/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index 2da0d1b8fb..178100b433 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -124,5 +124,6 @@ SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union( PUBLICATION_METADATA_FIELDS).union( CALIBRE_METADATA_FIELDS).union( DEVICE_METADATA_FIELDS) - \ - frozenset(['device_collections', 'formats']) + frozenset(['device_collections', 'formats', + 'cover_data']) # these are rebuilt when needed From 257a82f06bf3728db65da63e55758c975d7711a9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 7 Oct 2010 14:58:13 -0600 Subject: [PATCH 6/6] ... --- src/calibre/ebooks/metadata/book/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index 178100b433..82de7400d7 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -101,7 +101,6 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union( CALIBRE_METADATA_FIELDS) # Metadata fields that smart update must do special processing to copy. - SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors', 'author_sort', 'author_sort_map', 'cover_data', 'tags', 'language',