From 48b98f4a043a73bab964d95ea12d6fb324ab2793 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Jan 2010 08:35:58 -0700 Subject: [PATCH 1/4] ... --- src/calibre/translations/nb.po | 4 ++-- src/calibre/translations/pl.po | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/translations/nb.po b/src/calibre/translations/nb.po index 7f52ea8e4b..eb19aad59b 100644 --- a/src/calibre/translations/nb.po +++ b/src/calibre/translations/nb.po @@ -7652,7 +7652,7 @@ msgstr "" "Tilgjengelige områder: alle, forfattersortering, forfattere, kommentarer, " "omslagsbilde, formater, id, isbn, publikasjonsdato, bedømmelse, " "serieinndeks, serier, størrelse, tidsmerker, tittel, uuid.\n" -"Standard: %standard\n" +"Standard: %default\n" "Gjelder: CSV, XML utdataformater" #: /home/kovid/work/calibre/src/calibre/library/catalog.py:34 @@ -7665,7 +7665,7 @@ msgstr "" "Utdatafelter som kan sorteres.\n" "Tilgjengelige områder: forfattersortering, id, bedømmelse, størrelse, " "tidsmerking, tittel.\n" -"Standard: %standard'\n" +"Standard: '%default'\n" "Gjelder: CSV, XML utdataformater" #: /home/kovid/work/calibre/src/calibre/library/cli.py:121 diff --git a/src/calibre/translations/pl.po b/src/calibre/translations/pl.po index 9954a21192..6411f90a9c 100644 --- a/src/calibre/translations/pl.po +++ b/src/calibre/translations/pl.po @@ -604,7 +604,7 @@ msgstr "Wykrycie dysku %s niemożliwe. Spróbuj ponownie uruchomić komputer." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:429 msgid "Unable to detect the %s mount point. Try rebooting." -msgstr "Nie można wykryć % s punkt montowania. Spróbuj zrestartować system." +msgstr "Nie można wykryć %s punkt montowania. Spróbuj zrestartować system." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:484 msgid "Unable to detect the %s disk drive." @@ -718,7 +718,7 @@ msgid "" msgstr "" "Nie udało się proces komiks: \n" "\n" -"% s" +"%s" #: /home/kovid/work/calibre/src/calibre/ebooks/comic/input.py:278 msgid "" From 427061d5b8dc182e8f94df3082ff4381251f1d00 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Jan 2010 09:27:49 -0700 Subject: [PATCH 2/4] Fix #4635 (ARS Technica feed returns TOC but no content) --- Changelog.yaml | 1 + resources/recipes/ars_technica.recipe | 51 ++++++++++++++++----------- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/Changelog.yaml b/Changelog.yaml index eb221f0213..8399532bb1 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -127,6 +127,7 @@ - FTD - The National Post - Blic + - Ars Technica - version: 0.6.34 diff --git a/resources/recipes/ars_technica.recipe b/resources/recipes/ars_technica.recipe index e5b54edc03..717a47dd0c 100644 --- a/resources/recipes/ars_technica.recipe +++ b/resources/recipes/ars_technica.recipe @@ -1,12 +1,12 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' arstechnica.com ''' from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class ArsTechnica2(BasicNewsRecipe): title = u'Ars Technica' @@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - encoding = 'utf8' - remove_javascript = True + encoding = 'utf-8' use_embedded_content = False + extra_css = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} ' - extra_css = ''' - .news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;} - .news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;} - .news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} - .news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;} - .news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;} - .news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} - ''' + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } - keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})] + + + keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})] remove_tags = [ dict(name=['object','link','embed']) - ,dict(name='div', attrs={'class':'related-stories'}) + ,dict(name='div', attrs={'class':'read-more-link'}) ] @@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe): ] def append_page(self, soup, appendtag, position): - pager = soup.find('div',attrs={'id':'pager'}) + pager = soup.find('div',attrs={'class':'pager'}) if pager: for atag in pager.findAll('a',href=True): str = self.tag_to_string(atag) if str.startswith('Next'): - soup2 = self.index_to_soup(atag['href']) - - texttag = soup2.find('div', attrs={'class':'news-item-text'}) + nurl = 'http://arstechnica.com' + atag['href'] + rawc = self.index_to_soup(nurl,True) + soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding) + + readmoretag = soup2.find('div', attrs={'class':'read-more-link'}) + if readmoretag: + readmoretag.extract() + texttag = soup2.find('div', attrs={'class':'body'}) for it in texttag.findAll(style=True): del it['style'] @@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe): def preprocess_html(self, soup): - - ftag = soup.find('div', attrs={'class':'news-item-byline'}) + ftag = soup.find('div', attrs={'class':'byline'}) if ftag: - ftag.insert(4,'

') + brtag = Tag(soup,'br') + brtag2 = Tag(soup,'br') + ftag.insert(4,brtag) + ftag.insert(5,brtag2) for item in soup.findAll(style=True): del item['style'] @@ -83,5 +90,7 @@ class ArsTechnica2(BasicNewsRecipe): return soup + def get_article_url(self, article): + return article.get('feedburner_origlink', None).rpartition('?')[0] From 801a40ff23fbf805ad8dab7b30edc35516f7c140 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Jan 2010 10:26:05 -0700 Subject: [PATCH 3/4] New recipe for Editor and Publisher by XanthanGum --- Changelog.yaml | 2 ++ resources/recipes/editor_and_publisher.recipe | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 resources/recipes/editor_and_publisher.recipe diff --git a/Changelog.yaml b/Changelog.yaml index 8399532bb1..3d64dd5e4a 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -119,6 +119,8 @@ - title: stuff.co.nz author: Krittika Goyal + - title: Editor and Publisher + author: XanthanGum improved recipes: - Physics Today diff --git a/resources/recipes/editor_and_publisher.recipe b/resources/recipes/editor_and_publisher.recipe new file mode 100644 index 0000000000..e8a42e1f95 --- /dev/null +++ b/resources/recipes/editor_and_publisher.recipe @@ -0,0 +1,34 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe +class EandP(BasicNewsRecipe): + title = u'Editor and Publisher' + __author__ = u'Xanthan Gum' + description = 'News about newspapers and journalism.' + language = 'en' + no_stylesheets = True + + oldest_article = 7 + max_articles_per_feed = 100 + + # Font formatting code borrowed from kwetal + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} + h1{font-size: xx-large;} + h2{font-size: large;} + ''' + + # Delete everything before the article + + remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'}) + + # Delete everything after the article + + preprocess_regexps = [(re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''),] + + feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'), + (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'), + (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'), + (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'), + (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')] From 3a1d39422ac4642a28b8eeea7ffbaa191ef601ce Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Jan 2010 11:42:23 -0700 Subject: [PATCH 4/4] ... --- src/calibre/gui2/catalog/catalog_epub_mobi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.py b/src/calibre/gui2/catalog/catalog_epub_mobi.py index 0865367109..85f728552a 100644 --- a/src/calibre/gui2/catalog/catalog_epub_mobi.py +++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py @@ -40,8 +40,6 @@ class PluginWidget(QWidget,Ui_Form): def options(self): # Save/return the current options - # getattr() returns text value of QLineEdit control - print "gui2.catalog.catalog_epub_mobi:options(): Saving options" opts_dict = {} for opt in self.OPTION_FIELDS: opt_value = unicode(getattr(self, opt[0]).text())