diff --git a/Changelog.yaml b/Changelog.yaml index d3032623a0..9e59c8ae8c 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -20,6 +20,57 @@ # - title: +- version: 0.8.56 + date: 2012-06-15 + + new features: + - title: "Make the new calibre style default on Windows and OS X." + type: major + description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style" + + - title: "Get Books: Add the new SONY Reader store" + + - title: "Read metadata from .docx (Microsoft Word) files" + + - title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching" + + - title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser" + + - title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost" + + bug fixes: + - title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo" + + - title: "AZW3 Output: Fix TOC at start option not working" + + - title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers." + + - title: "Fix the current_library_name() template function not updating after a library switch" + + - title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document." + tickets: [1011330] + + - title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser" + tickets: [1011030] + + - title: "News download: Handle query only relative URLs" + + improved recipes: + - Christian Science Monitor + - Neue Zurcher Zeitung + - Birmignham Post + - Metro UK + - New Musical Express + - The Independent + - The Daily Mirror + - Vreme + - Smithsonian Magazine + + new recipes: + - title: NZZ Webpaper + author: Bernd Leinfelder + + - version: 0.8.55 date: 2012-06-08 diff --git a/recipes/huffingtonpost_uk.recipe b/recipes/huffingtonpost_uk.recipe new file mode 100644 index 0000000000..92e941de5e --- /dev/null +++ b/recipes/huffingtonpost_uk.recipe @@ -0,0 +1,47 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class HindustanTimes(BasicNewsRecipe): + title = u'Huffington Post UK' + language = 'en_GB' + __author__ = 'Krittika Goyal' + oldest_article = 2 #days + max_articles_per_feed = 25 + #encoding = 'cp1252' + use_embedded_content = False + + no_stylesheets = True + auto_cleanup = True + auto_cleanup_keep = '//div[@class="articleBody"]' + + feeds = [ +('UK Politics', + 'http://www.huffingtonpost.com/feeds/verticals/uk-politics/news.xml'), +('UK Entertainment', + 'http://www.huffingtonpost.com/feeds/verticals/uk-entertainment/news.xml'), +('UK Style', + 'http://www.huffingtonpost.com/feeds/verticals/uk-style/news.xml'), +('UK Fashion:', + 'http://www.huffingtonpost.com/feeds/verticals/uk-fashion/news.xml'), +('UK Universities:', + 'http://www.huffingtonpost.com/feeds/verticals/uk-universities-education/news.xml'), +('UK World', + 'http://www.huffingtonpost.com/feeds/verticals/uk-world/news.xml'), +('UK Lifestyle', + 'http://www.huffingtonpost.com/feeds/verticals/uk-lifestyle/news.xml'), +('UK Comedy', + 'http://www.huffingtonpost.com/feeds/verticals/uk-comedy/news.xml'), +('UK Celebrity', + 'http://www.huffingtonpost.com/feeds/verticals/uk-celebrity/news.xml'), +('UK Culture', + 'http://www.huffingtonpost.com/feeds/verticals/uk-culture/news.xml'), +('UK News', + 'http://www.huffingtonpost.com/feeds/verticals/uk/news.xml'), +('UK Tech', + 'http://www.huffingtonpost.com/feeds/verticals/uk-tech/news.xml'), +('UK Sport', + 'http://www.huffingtonpost.com/feeds/verticals/uk-sport/news.xml'), + ] + def get_article_url(self, entry): + if entry.links: + return entry.links[0]['href'] + return BasicNewsRecipe.get_article_url(self, entry) diff --git a/recipes/smith.recipe b/recipes/smith.recipe index 06075b8d1b..8bf60a227a 100644 --- a/recipes/smith.recipe +++ b/recipes/smith.recipe @@ -1,26 +1,42 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup class SmithsonianMagazine(BasicNewsRecipe): title = u'Smithsonian Magazine' language = 'en' - __author__ = 'Krittika Goyal' + __author__ = 'Krittika Goyal and TerminalVeracity' oldest_article = 31#days max_articles_per_feed = 50 use_embedded_content = False - #encoding = 'latin1' recursions = 1 + cover_url = 'http://sphotos.xx.fbcdn.net/hphotos-snc7/431147_10150602715983253_764313347_n.jpg' match_regexps = ['&page=[2-9]$'] + preprocess_regexps = [ + (re.compile(r'for more of Smithsonian\'s coverage on history, science and nature.', re.DOTALL), lambda m: '') + ] + extra_css = """ + h1{font-size: large; margin: .2em 0} + h2{font-size: medium; margin: .2em 0} + h3{font-size: medium; margin: .2em 0} + #byLine{margin: .2em 0} + .articleImageCaptionwide{font-style: italic} + .wp-caption-text{font-style: italic} + img{display: block} + """ + remove_stylesheets = True - #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) - remove_tags_after = dict(name='p', attrs={'id':'articlePaginationWrapper'}) + remove_tags_after = dict(name='div', attrs={'class':['post','articlePaginationWrapper']}) remove_tags = [ dict(name='iframe'), - dict(name='div', attrs={'class':'article_sidebar_border'}), - dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large', 'most-popular-body_large']}), - ##dict(name='ul', attrs={'class':'article-tools'}), + dict(name='div', attrs={'class':['article_sidebar_border','viewMorePhotos','addtoany_share_save_container','meta','social','OUTBRAIN','related-articles-inpage']}), + dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large', 'most-popular-body_large','comment_section','article-related']}), dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}), + dict(name='h4', attrs={'id':'related-topics'}), + dict(name='table'), + dict(name='a', attrs={'href':['/subArticleBottomWeb','/subArticleTopWeb','/subArticleTopMag','/subArticleBottomMag']}), + dict(name='a', attrs={'name':'comments_shaded'}), ] @@ -39,15 +55,7 @@ class SmithsonianMagazine(BasicNewsRecipe): def preprocess_html(self, soup): story = soup.find(name='div', attrs={'id':'article-body'}) - ##td = heading.findParent(name='td') - ##td.extract() soup = BeautifulSoup('