__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' odb.org ''' from calibre.web.feeds.news import BasicNewsRecipe import uuid from lxml import html class OurDailyBread(BasicNewsRecipe): title = 'Our Daily Bread' __author__ = 'Kovid Goyal' description = "Our Daily Bread is a daily devotional from RBC Ministries which helps readers spend time each day in God's Word." oldest_article = 15 language = 'en' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' conversion_options = { 'comments': description, 'language': language, 'linearize_tables': True } keep_only_tags = [dict(attrs={'class': 'calibre-inserted-psalm'}), {'id': 'content'}] remove_tags = [ dict(attrs={'class': ['listen-box', 'entry-zoom', 'entry-footer']}), {'id': 'nav-single'}, dict(attrs={'class': lambda x: x and ' sharing ' in x}), ] extra_css = ''' .text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} .devotionalTitle{font-family:Arial,Helvetica,sans-serif; font-size:large; font-weight: bold;} .devotionalDate{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} .devotionalVerse{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } a{color:#000000;font-family:Arial,Helvetica,sans-serif; font-size:x-small;} ''' feeds = [(u'Our Daily Bread', u'http://odb.org/feed/')] def preprocess_raw_html(self, raw, url): # Convert links to referenced Psalms to the actual psalms root = html.fromstring(raw) for a in root.xpath( '//a[starts-with(@href, "http://www.biblegateway.com")]'): uid = type(u'')(uuid.uuid4()) raw = self.index_to_soup(a.get('href'), raw=True) iroot = html.fromstring(raw) matches = iroot.xpath( '//div[contains(@class, "result-text-style-normal")]') if matches: div = matches[0] div.getparent().remove(div) root.xpath('//body')[0].append(div) a.set('href', '#' + uid) del a.attrib['target'] div.set('id', uid) div.set('class', 'calibre-inserted-psalm') hr = div.makeelement('hr') div.insert(0, hr) # print html.tostring(div) raw = html.tostring(root, encoding='unicode') return raw def preprocess_html(self, soup): d = soup.find(id='content') d.extract() soup.find('body').insert(0, d) return soup