Fix #4314 (mobi generation error)

This commit is contained in:
Kovid Goyal 2010-01-05 20:02:57 -07:00
parent 8bfffc74e2
commit 3dd8767ecb
2 changed files with 94 additions and 94 deletions

View File

@ -1,93 +1,93 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class PeopleMag(BasicNewsRecipe): class PeopleMag(BasicNewsRecipe):
title = 'People/US Magazine Mashup' title = 'People/US Magazine Mashup'
__author__ = 'BrianG' __author__ = 'BrianG'
language = 'en' language = 'en'
description = 'Headlines from People and US Magazine' description = 'Headlines from People and US Magazine'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 50 max_articles_per_feed = 50
extra_css = ''' extra_css = '''
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;} h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;} .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.byline {font-size: small; color: #666666; font-style:italic; } .byline {font-size: small; color: #666666; font-style:italic; }
.lastline {font-size: small; color: #666666; font-style:italic;} .lastline {font-size: small; color: #666666; font-style:italic;}
.contact {font-size: small; color: #666666;} .contact {font-size: small; color: #666666;}
.contact p {font-size: small; color: #666666;} .contact p {font-size: small; color: #666666;}
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;} .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;} .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.article_timestamp{font-size:x-small; color:#666666;} .article_timestamp{font-size:x-small; color:#666666;}
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;} a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
''' '''
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': 'panel_news_article_main'}), dict(name='div', attrs={'class': 'panel_news_article_main'}),
dict(name='div', attrs={'class':'article_content'}), dict(name='div', attrs={'class':'article_content'}),
dict(name='div', attrs={'class': 'headline'}), dict(name='div', attrs={'class': 'headline'}),
dict(name='div', attrs={'class': 'post'}), dict(name='div', attrs={'class': 'post'}),
dict(name='div', attrs={'class': 'packageheadlines'}), dict(name='div', attrs={'class': 'packageheadlines'}),
dict(name='div', attrs={'class': 'snap_preview'}), dict(name='div', attrs={'class': 'snap_preview'}),
dict(name='div', attrs={'id': 'articlebody'}) dict(name='div', attrs={'id': 'articlebody'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'share_comments'}), dict(name='div', attrs={'class':'share_comments'}),
dict(name='p', attrs={'class':'twitter_facebook'}), dict(name='p', attrs={'class':'twitter_facebook'}),
dict(name='div', attrs={'class':'share_comments_bottom'}), dict(name='div', attrs={'class':'share_comments_bottom'}),
dict(name='h2', attrs={'id':'related_content'}), dict(name='h2', attrs={'id':'related_content'}),
dict(name='div', attrs={'class':'next_article'}), dict(name='div', attrs={'class':'next_article'}),
dict(name='div', attrs={'class':'prev_article'}), dict(name='div', attrs={'class':'prev_article'}),
dict(name='ul', attrs={'id':'sharebar'}), dict(name='ul', attrs={'id':'sharebar'}),
dict(name='div', attrs={'class':'sharelinkcont'}), dict(name='div', attrs={'class':'sharelinkcont'}),
dict(name='div', attrs={'class':'categories'}), dict(name='div', attrs={'class':'categories'}),
dict(name='ul', attrs={'class':'categories'}), dict(name='ul', attrs={'class':'categories'}),
dict(name='div', attrs={'id':'promo'}), dict(name='div', attrs={'id':'promo'}),
dict(name='div', attrs={'class':'linksWrapper'}), dict(name='div', attrs={'class':'linksWrapper'}),
dict(name='p', attrs={'class':'tag tvnews'}), dict(name='p', attrs={'class':'tag tvnews'}),
dict(name='p', attrs={'class':'tag movienews'}), dict(name='p', attrs={'class':'tag movienews'}),
dict(name='p', attrs={'class':'tag musicnews'}), dict(name='p', attrs={'class':'tag musicnews'}),
dict(name='p', attrs={'class':'tag couples'}), dict(name='p', attrs={'class':'tag couples'}),
dict(name='p', attrs={'class':'tag gooddeeds'}), dict(name='p', attrs={'class':'tag gooddeeds'}),
dict(name='p', attrs={'class':'tag weddings'}), dict(name='p', attrs={'class':'tag weddings'}),
dict(name='p', attrs={'class':'tag health'}) dict(name='p', attrs={'class':'tag health'})
] ]
feeds = [ feeds = [
('PEOPLE Headlines', 'http://feeds.people.com/people/headlines'), ('PEOPLE Headlines', 'http://feeds.people.com/people/headlines'),
('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss') ('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
] ]
def get_article_url(self, article): def get_article_url(self, article):
ans = article.link ans = article.link
try: try:
self.log('Looking for full story link in', ans) self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans) soup = self.index_to_soup(ans)
x = soup.find(text="View All") x = soup.find(text="View All")
if x is not None: if x is not None:
ans = ans + '?viewAll=y' ans = ans + '?viewAll=y'
self.log('Found full story link', ans) self.log('Found full story link', ans)
except: except:
pass pass
return ans return ans
def postprocess_html(self, soup,first): def postprocess_html(self, soup,first):
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}): for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
tag.extract() tag.extract()
for tag in soup.findAll(name='br'): for tag in soup.findAll(name='br'):
tag.extract() tag.extract()
return soup return soup

View File

@ -245,7 +245,7 @@ class MobiMLizer(object):
bgcolor=istate.bgcolor) bgcolor=istate.bgcolor)
if istate.fgcolor != 'black': if istate.fgcolor != 'black':
inline = etree.SubElement(inline, XHTML('font'), inline = etree.SubElement(inline, XHTML('font'),
color=istate.fgcolor) color=unicode(istate.fgcolor))
if istate.strikethrough: if istate.strikethrough:
inline = etree.SubElement(inline, XHTML('s')) inline = etree.SubElement(inline, XHTML('s'))
bstate.inline = inline bstate.inline = inline