From 039572d937a77824f91fdac3faec9a60ff782fc5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 7 Dec 2008 18:12:19 -0800 Subject: [PATCH 01/39] Fix #1363 (Error when closing custom news source window) --- src/calibre/gui2/dialogs/user_profiles.py | 5 +++-- upload.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py index 005c947a94..5da604c04a 100644 --- a/src/calibre/gui2/dialogs/user_profiles.py +++ b/src/calibre/gui2/dialogs/user_profiles.py @@ -191,7 +191,7 @@ class %(classname)s(%(base_class)s): class Recipe(object): def __init__(self, title, id, recipes): - self.title = title + self.title = unicode(title) self.id = id self.text = recipes[id] def __cmp__(self, other): @@ -202,8 +202,9 @@ class %(classname)s(%(base_class)s): title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'), items, 0, False) if ok: + title = unicode(title) for r in recipes: - if r.title == unicode(title): + if r.title == title: try: self.available_profiles.add_item(title, (title, r.text), replace=False) except ValueError: diff --git a/upload.py b/upload.py index d005c6dbbd..a8b3ea2a3f 100644 --- a/upload.py +++ b/upload.py @@ -238,7 +238,11 @@ def stage_three(): print 'Uploading to PyPI...' check_call('rm -f dist/*') check_call('python setup.py register') + check_call('sudo rm -rf build') + os.mkdir('build') check_call('python2.5 setup.py bdist_egg --exclude-source-files upload') + shutil.rmtree('build') + os.mkdir('build') check_call('python setup.py bdist_egg --exclude-source-files upload') check_call('python setup.py sdist upload') upload_src_tarball() From 68f24cacedae01d61133f56dcafd60e235315f0a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 8 Dec 2008 11:37:32 -0800 Subject: [PATCH 02/39] Fix downloaded news in EPUB format causing reader resets --- src/calibre/web/feeds/__init__.py | 12 ++++ src/calibre/web/feeds/news.py | 6 +- src/calibre/web/feeds/recipes/__init__.py | 2 +- src/calibre/web/feeds/recipes/harpers_full.py | 61 +++++++++++++++++++ src/calibre/web/feeds/templates.py | 4 +- 5 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/harpers_full.py diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index dffb9f8c56..7e1dee577d 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -9,6 +9,7 @@ import time, logging, traceback, copy from datetime import datetime from calibre.web.feeds.feedparser import parse +from lxml import html class Article(object): @@ -19,6 +20,17 @@ class Article(object): self.id = id self.title = title.strip() if title else title self.url = url + if summary and not isinstance(summary, unicode): + summary = summary.decode('utf-8', 'replace') + if summary and '<' in summary: + try: + s = html.fragment_fromstring(summary, create_parent=True) + summary = html.tostring(s, method='text', encoding=unicode) + except: + print 'Failed to process article summary, deleting:' + print summary.encode('utf-8') + traceback.print_exc() + summary = u'' self.summary = summary self.content = content self.date = published diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index ed94ec8270..1bc1fc77d8 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -586,9 +586,9 @@ class BasicNewsRecipe(object, LoggingInterface): if npos < 0: npos = pos ans = src[:npos+1] - if isinstance(ans, unicode): - return ans - return ans+u'\u2026' if isinstance(ans, unicode) else ans + '...' + if len(ans) < len(src): + return ans+u'\u2026' if isinstance(ans, unicode) else ans + '...' + return ans diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 3eabaf532a..6c3c25370f 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -17,7 +17,7 @@ recipe_modules = [ 'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman', 'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times', 'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas', - 'science_news', 'the_nation', 'lrb' + 'science_news', 'the_nation', 'lrb', 'harpers_full' ] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/harpers_full.py b/src/calibre/web/feeds/recipes/harpers_full.py new file mode 100644 index 0000000000..c87faf195a --- /dev/null +++ b/src/calibre/web/feeds/recipes/harpers_full.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +harpers.org - paid subscription/ printed issue articles +This recipe only get's article's published in text format +images and pdf's are ignored +''' + +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Harpers_full(BasicNewsRecipe): + title = u"Harper's Magazine - articles from printed edition" + __author__ = u'Darko Miletic' + description = u"Harper's Magazine: Founded June 1850." + oldest_article = 30 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + simultaneous_downloads = 1 + delay = 1 + needs_subscription = True + INDEX = strftime('http://www.harpers.org/archive/%Y/%m') + LOGIN = 'http://www.harpers.org' + cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif') + + keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] + remove_tags = [ + dict(name='table', attrs={'class':'rcnt'}) + ,dict(name='table', attrs={'class':'rcnt topline'}) + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open(self.LOGIN) + br.select_form(nr=1) + br['handle' ] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + articles = [] + print 'Processing ' + self.INDEX + soup = self.index_to_soup(self.INDEX) + for item in soup.findAll('div', attrs={'class':'title'}): + text_link = item.parent.find('img',attrs={'alt':'Text'}) + if text_link: + url = self.LOGIN + item.a['href'] + title = item.a.contents[0] + date = strftime(' %B %Y') + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + return [(soup.head.title.string, articles)] \ No newline at end of file diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index d1bd593956..b6b0200a66 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -144,9 +144,9 @@ class FeedTemplate(Template):
  • ${article.title} -

    +

    ${Markup(cutoff(article.summary))} -

    +
  • From a06522c486b97d6268bc268c6970201c441c63a6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 8 Dec 2008 17:30:33 -0800 Subject: [PATCH 03/39] IGN:Add spacing to article list in downloaded feeds --- src/calibre/web/feeds/templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index b6b0200a66..4cc79a8b89 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -141,7 +141,7 @@ class FeedTemplate(Template):