Merge from trunk

This commit is contained in:
Charles Haley 2012-06-17 09:17:22 +02:00
commit 6cff50f296
8 changed files with 109 additions and 42 deletions

View File

@ -6,10 +6,12 @@ class AdvancedUserRecipe1271446252(BasicNewsRecipe):
max_articles_per_feed = 100
language = 'fr'
__author__ = 'zorgluf'
max_articles_per_feed = 25
#encoding = 'cp1252'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [(u'CanardPC', u'http://www.canardpc.com/feed.php')]
remove_tags_after = dict(id='auteur_news')
remove_tags_before = dict(id='fil_ariane')
no_stylesheets = True
remove_tags = [dict(name='a', attrs={'class':'news_tags'}),
dict(name='div', attrs={'id':'fil_ariane'})]

View File

@ -0,0 +1,46 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class NatGeoMag(BasicNewsRecipe):
title = 'National Geographic Mag'
__author__ = 'Terminal Veracity'
description = 'The National Geographic Magazine'
publisher = 'National Geographic'
oldest_article = 31
max_articles_per_feed = 50
category = 'geography, magazine'
language = 'en_US'
publication_type = 'magazine'
cover_url = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 1
remove_empty_feeds = True
feeds = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')]
remove_tags = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})]
keep_only_tags = [dict(attrs={'class':'main_3narrow'})]
extra_css = """
h1 {font-size: large; font-weight: bold; margin: .5em 0; }
h2 {font-size: large; font-weight: bold; margin: .5em 0; }
h3 {font-size: medium; font-weight: bold; margin: 0 0; }
.article_credits_author {font-size: small; font-style: italic; }
.article_credits_photographer {font-size: small; font-style: italic; display: inline }
"""
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'Flashback' in article.title:
feed.articles.remove(article)
elif 'Desktop Wallpaper' in article.title:
feed.articles.remove(article)
elif 'Visions of Earth' in article.title:
feed.articles.remove(article)
elif 'Your Shot' in article.title:
feed.articles.remove(article)
elif 'MyShot' in article.title:
feed.articles.remove(article)
elif 'Field Test' in article.title:
feed.articles.remove(article)
return feeds

View File

@ -1,3 +1,4 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -5,16 +6,17 @@ odb.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
import uuid
from lxml import html
class OurDailyBread(BasicNewsRecipe):
title = 'Our Daily Bread'
__author__ = 'Darko Miletic and Sujata Raman'
__author__ = 'Kovid Goyal'
description = "Our Daily Bread is a daily devotional from RBC Ministries which helps readers spend time each day in God's Word."
oldest_article = 15
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = True
use_embedded_content = False
category = 'ODB, Daily Devotional, Bible, Christian Devotional, Devotional, RBC Ministries, Our Daily Bread, Devotionals, Daily Devotionals, Christian Devotionals, Faith, Bible Study, Bible Studies, Scripture, RBC, religion'
encoding = 'utf-8'
@ -26,12 +28,14 @@ class OurDailyBread(BasicNewsRecipe):
,'linearize_tables' : True
}
#keep_only_tags = [dict(attrs={'class':'module-content'})]
#remove_tags = [
#dict(attrs={'id':'article-zoom'})
#,dict(attrs={'class':'listen-now-box'})
#]
#remove_tags_after = dict(attrs={'class':'readable-area'})
keep_only_tags = [dict(attrs={'class':'calibre-inserted-psalm'}),
{'id':'content'}]
remove_tags = [
dict(attrs={'class':['listen-box', 'entry-zoom',
'entry-footer']}),
{'id':'nav-single'},
dict(attrs={'class':lambda x:x and ' sharing ' in x}),
]
extra_css = '''
.text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -43,18 +47,33 @@ class OurDailyBread(BasicNewsRecipe):
feeds = [(u'Our Daily Bread', u'http://odb.org/feed/')]
def preprocess_raw_html(self, raw, url):
# Convert links to referenced Psalms to the actual psalms
root = html.fromstring(raw)
for a in root.xpath(
'//a[starts-with(@href, "http://www.biblegateway.com")]'):
uid = type(u'')(uuid.uuid4())
raw = self.index_to_soup(a.get('href'), raw=True)
iroot = html.fromstring(raw)
matches = iroot.xpath('//div[contains(@class, "result-text-style-normal")]')
if matches:
div = matches[0]
div.getparent().remove(div)
root.xpath('//body')[0].append(div)
a.set('href', '#'+uid)
del a.attrib['target']
div.set('id', uid)
div.set('class', 'calibre-inserted-psalm')
hr = div.makeelement('hr')
div.insert(0, hr)
# print html.tostring(div)
raw = html.tostring(root, encoding=unicode)
return raw
def preprocess_html(self, soup):
return self.adeify_images(soup)
d = soup.find(id='content')
d.extract()
soup.find('body').insert(0, d)
return soup
def get_cover_url(self):
href = 'http://www.rbc.org/index.aspx'
soup = self.index_to_soup(href)
a = soup.find('a',attrs={'id':'ctl00_hlTodaysDevotionalImage'})
if a :
cover_url = a.img['src']
return cover_url

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
theeconomiccollapseblog.com
'''
@ -11,7 +11,7 @@ class TheEconomicCollapse(BasicNewsRecipe):
description = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?'
publisher = 'The Economic Collapse'
category = 'news, politics, USA, economy'
oldest_article = 2
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
@ -20,7 +20,7 @@ class TheEconomicCollapse(BasicNewsRecipe):
remove_empty_feeds = True
extra_css = """
body{font-family: Tahoma,Arial,sans-serif }
img{margin-bottom: 0.4em}
img{margin-bottom: 0.4em; display: block;}
"""
conversion_options = {
@ -35,12 +35,9 @@ class TheEconomicCollapse(BasicNewsRecipe):
,dict(name=['iframe','object','embed','meta','link','base'])
]
remove_attributes=['lang','onclick','width','height']
keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})]
keep_only_tags=[
dict(name='div', attrs={'class':'post-headline'}),
dict(name='div', attrs={'class':lambda x: x and 'post-bodycopy' in x.split()})
]
feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -55,6 +55,7 @@ class ANDROID(USBMS):
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226],
0x4316 : [0x216],
0x4317 : [0x216],
0x42d6 : [0x216],
0x42d7 : [0x216],
0x42f7 : [0x216],
@ -202,7 +203,7 @@ class ANDROID(USBMS):
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO']
'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -211,7 +212,7 @@ class ANDROID(USBMS):
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE', 'PRO']
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -20,6 +20,11 @@ class LRFOptions(object):
except:
return ''
m = oeb.metadata
for x in ('left', 'top', 'right', 'bottom'):
attr = 'margin_'+x
val = getattr(opts, attr)
if val < 0:
setattr(opts, attr, 0)
self.title = None
self.author = self.publisher = _('Unknown')
self.title_sort = self.author_sort = ''

View File

@ -28,6 +28,7 @@ class EXTHHeader(object): # {{{
self.start_offset = None
left = self.num_items
self.kf8_header = None
self.uuid = self.cdetype = None
while left > 0:
left -= 1

View File

@ -102,12 +102,8 @@ class AuthController(object):
@wraps(func)
def authenticate(*args, **kwargs):
cookie = cherrypy.request.cookie.get(self.cookie_name, None)
ua = cherrypy.request.headers.get('User-Agent', '').strip()
if ('iPad;' in ua or 'iPhone;' in ua or (
not (allow_cookie_auth and self.is_valid(cookie)))):
# Apparently the iPad cant handle this
# see https://bugs.launchpad.net/bugs/1013976
if not (allow_cookie_auth and self.is_valid(cookie)):
digest_auth(self.realm, get_ha1_dict_plain(self.users_dict),
self.secret)