diff --git a/resources/recipes/people_us_mashup.recipe b/resources/recipes/people_us_mashup.recipe index 2631ac5184..38d750cd4c 100644 --- a/resources/recipes/people_us_mashup.recipe +++ b/resources/recipes/people_us_mashup.recipe @@ -1,93 +1,93 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -''' -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - -class PeopleMag(BasicNewsRecipe): - - title = 'People/US Magazine Mashup' - __author__ = 'BrianG' - language = 'en' - description = 'Headlines from People and US Magazine' - no_stylesheets = True - use_embedded_content = False - oldest_article = 2 - max_articles_per_feed = 50 - - extra_css = ''' - h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;} - h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} - .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} - .byline {font-size: small; color: #666666; font-style:italic; } - .lastline {font-size: small; color: #666666; font-style:italic;} - .contact {font-size: small; color: #666666;} - .contact p {font-size: small; color: #666666;} - .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;} - .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;} - .article_timestamp{font-size:x-small; color:#666666;} - a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;} - ''' - - - keep_only_tags = [ - dict(name='div', attrs={'class': 'panel_news_article_main'}), - dict(name='div', attrs={'class':'article_content'}), - dict(name='div', attrs={'class': 'headline'}), - dict(name='div', attrs={'class': 'post'}), - dict(name='div', attrs={'class': 'packageheadlines'}), - dict(name='div', attrs={'class': 'snap_preview'}), - dict(name='div', attrs={'id': 'articlebody'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class':'share_comments'}), - dict(name='p', attrs={'class':'twitter_facebook'}), - dict(name='div', attrs={'class':'share_comments_bottom'}), - dict(name='h2', attrs={'id':'related_content'}), - dict(name='div', attrs={'class':'next_article'}), - dict(name='div', attrs={'class':'prev_article'}), - dict(name='ul', attrs={'id':'sharebar'}), - dict(name='div', attrs={'class':'sharelinkcont'}), - dict(name='div', attrs={'class':'categories'}), - dict(name='ul', attrs={'class':'categories'}), - dict(name='div', attrs={'id':'promo'}), - dict(name='div', attrs={'class':'linksWrapper'}), - dict(name='p', attrs={'class':'tag tvnews'}), - dict(name='p', attrs={'class':'tag movienews'}), - dict(name='p', attrs={'class':'tag musicnews'}), - dict(name='p', attrs={'class':'tag couples'}), - dict(name='p', attrs={'class':'tag gooddeeds'}), - dict(name='p', attrs={'class':'tag weddings'}), - dict(name='p', attrs={'class':'tag health'}) -] - - - feeds = [ - ('PEOPLE Headlines', 'http://feeds.people.com/people/headlines'), - ('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss') - ] - - def get_article_url(self, article): - ans = article.link - - try: - self.log('Looking for full story link in', ans) - soup = self.index_to_soup(ans) - x = soup.find(text="View All") - - if x is not None: - ans = ans + '?viewAll=y' - self.log('Found full story link', ans) - except: - pass - return ans - - def postprocess_html(self, soup,first): - - for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}): - tag.extract() - for tag in soup.findAll(name='br'): - tag.extract() - - return soup +#!/usr/bin/env python +__license__ = 'GPL v3' +''' +''' +from calibre.web.feeds.recipes import BasicNewsRecipe + +class PeopleMag(BasicNewsRecipe): + + title = 'People/US Magazine Mashup' + __author__ = 'BrianG' + language = 'en' + description = 'Headlines from People and US Magazine' + no_stylesheets = True + use_embedded_content = False + oldest_article = 2 + max_articles_per_feed = 50 + + extra_css = ''' + h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;} + h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} + .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} + .byline {font-size: small; color: #666666; font-style:italic; } + .lastline {font-size: small; color: #666666; font-style:italic;} + .contact {font-size: small; color: #666666;} + .contact p {font-size: small; color: #666666;} + .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;} + .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;} + .article_timestamp{font-size:x-small; color:#666666;} + a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;} + ''' + + + keep_only_tags = [ + dict(name='div', attrs={'class': 'panel_news_article_main'}), + dict(name='div', attrs={'class':'article_content'}), + dict(name='div', attrs={'class': 'headline'}), + dict(name='div', attrs={'class': 'post'}), + dict(name='div', attrs={'class': 'packageheadlines'}), + dict(name='div', attrs={'class': 'snap_preview'}), + dict(name='div', attrs={'id': 'articlebody'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':'share_comments'}), + dict(name='p', attrs={'class':'twitter_facebook'}), + dict(name='div', attrs={'class':'share_comments_bottom'}), + dict(name='h2', attrs={'id':'related_content'}), + dict(name='div', attrs={'class':'next_article'}), + dict(name='div', attrs={'class':'prev_article'}), + dict(name='ul', attrs={'id':'sharebar'}), + dict(name='div', attrs={'class':'sharelinkcont'}), + dict(name='div', attrs={'class':'categories'}), + dict(name='ul', attrs={'class':'categories'}), + dict(name='div', attrs={'id':'promo'}), + dict(name='div', attrs={'class':'linksWrapper'}), + dict(name='p', attrs={'class':'tag tvnews'}), + dict(name='p', attrs={'class':'tag movienews'}), + dict(name='p', attrs={'class':'tag musicnews'}), + dict(name='p', attrs={'class':'tag couples'}), + dict(name='p', attrs={'class':'tag gooddeeds'}), + dict(name='p', attrs={'class':'tag weddings'}), + dict(name='p', attrs={'class':'tag health'}) +] + + + feeds = [ + ('PEOPLE Headlines', 'http://feeds.people.com/people/headlines'), + ('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss') + ] + + def get_article_url(self, article): + ans = article.link + + try: + self.log('Looking for full story link in', ans) + soup = self.index_to_soup(ans) + x = soup.find(text="View All") + + if x is not None: + ans = ans + '?viewAll=y' + self.log('Found full story link', ans) + except: + pass + return ans + + def postprocess_html(self, soup,first): + + for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}): + tag.extract() + for tag in soup.findAll(name='br'): + tag.extract() + + return soup diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index dfff213be4..aa69ba446b 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -245,7 +245,7 @@ class MobiMLizer(object): bgcolor=istate.bgcolor) if istate.fgcolor != 'black': inline = etree.SubElement(inline, XHTML('font'), - color=istate.fgcolor) + color=unicode(istate.fgcolor)) if istate.strikethrough: inline = etree.SubElement(inline, XHTML('s')) bstate.inline = inline