calibre/recipes/ourdailybread.recipe


__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
odb.org
'''

from calibre.web.feeds.news import BasicNewsRecipe
import uuid
from lxml import html


class OurDailyBread(BasicNewsRecipe):
    title = 'Our Daily Bread'
    __author__ = 'Kovid Goyal'
    description = "Our Daily Bread is a daily devotional from RBC Ministries which helps readers spend time each day in God's Word."
    oldest_article = 15
    language = 'en'
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    encoding = 'utf-8'

    conversion_options = {
        'comments': description, 'language': language, 'linearize_tables': True
    }

    keep_only_tags = [dict(attrs={'class': 'calibre-inserted-psalm'}),
                      {'id': 'content'}]
    remove_tags = [
        dict(attrs={'class': ['listen-box', 'entry-zoom',
                              'entry-footer']}),
        {'id': 'nav-single'},
        dict(attrs={'class': lambda x: x and ' sharing ' in x}),
    ]

    extra_css = '''
                .text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                .devotionalTitle{font-family:Arial,Helvetica,sans-serif; font-size:large; font-weight: bold;}
                .devotionalDate{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
                .devotionalVerse{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
                a{color:#000000;font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
                '''

    feeds = [(u'Our Daily Bread', u'http://odb.org/feed/')]

    def preprocess_raw_html(self, raw, url):
        # Convert links to referenced Psalms to the actual psalms
        root = html.fromstring(raw)
        for a in root.xpath(
                '//a[starts-with(@href, "http://www.biblegateway.com")]'):
            uid = type(u'')(uuid.uuid4())
            raw = self.index_to_soup(a.get('href'), raw=True)
            iroot = html.fromstring(raw)
            matches = iroot.xpath(
                '//div[contains(@class, "result-text-style-normal")]')
            if matches:
                div = matches[0]
                div.getparent().remove(div)
                root.xpath('//body')[0].append(div)
                a.set('href', '#' + uid)
                del a.attrib['target']
                div.set('id', uid)
                div.set('class', 'calibre-inserted-psalm')
                hr = div.makeelement('hr')
                div.insert(0, hr)
                # print html.tostring(div)
                raw = html.tostring(root, encoding='unicode')
        return raw

    def preprocess_html(self, soup):
        d = soup.find(id='content')
        d.extract()
        soup.find('body').insert(0, d)
        return soup