Merge from trunk

2025-07-08 18:54:09 -04:00 · 2012-06-17 09:17:22 +02:00 · 2012-06-17 09:17:22 +02:00 · 6cff50f296
commit 6cff50f296
parent 3e072cea3c d537d3f2ae
8 changed files with 109 additions and 42 deletions
--- a/recipes/canardpc.recipe
+++ b/recipes/canardpc.recipe
@ -6,10 +6,12 @@ class AdvancedUserRecipe1271446252(BasicNewsRecipe):
    max_articles_per_feed = 100
    language = 'fr'
    __author__ = 'zorgluf'
+    max_articles_per_feed = 25
+    #encoding = 'cp1252'
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True

    feeds          = [(u'CanardPC', u'http://www.canardpc.com/feed.php')]
-    remove_tags_after  = dict(id='auteur_news')
-    remove_tags_before = dict(id='fil_ariane')
-    no_stylesheets = True
-    remove_tags    = [dict(name='a', attrs={'class':'news_tags'}),
-        dict(name='div', attrs={'id':'fil_ariane'})]
+
--- a/recipes/national_geographic_mag.recipe
+++ b/recipes/national_geographic_mag.recipe
@ -0,0 +1,46 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class NatGeoMag(BasicNewsRecipe):
+    title                  = 'National Geographic Mag'
+    __author__             = 'Terminal Veracity'
+    description            = 'The National Geographic Magazine'
+    publisher              = 'National Geographic'
+    oldest_article         = 31
+    max_articles_per_feed  = 50
+    category               = 'geography, magazine'
+    language               = 'en_US'
+    publication_type       = 'magazine'
+    cover_url              = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg'
+    use_embedded_content   = False
+    no_stylesheets         = True
+    remove_javascript      = True
+    recursions             = 1
+    remove_empty_feeds     = True
+    feeds                  = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')]
+    remove_tags            = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})]
+    keep_only_tags         = [dict(attrs={'class':'main_3narrow'})]
+    extra_css              = """
+                                h1 {font-size: large; font-weight: bold; margin: .5em 0; }
+                                h2 {font-size: large; font-weight: bold; margin: .5em 0; }
+                                h3 {font-size: medium; font-weight: bold; margin: 0 0; }
+                                .article_credits_author {font-size: small; font-style: italic; }
+                                .article_credits_photographer {font-size: small; font-style: italic; display: inline }
+                             """
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+            for article in feed.articles[:]:
+                if 'Flashback' in article.title:
+                    feed.articles.remove(article)
+                elif 'Desktop Wallpaper' in article.title:
+                    feed.articles.remove(article)
+                elif 'Visions of Earth' in article.title:
+                    feed.articles.remove(article)
+                elif 'Your Shot' in article.title:
+                    feed.articles.remove(article)
+                elif 'MyShot' in article.title:
+                    feed.articles.remove(article)
+                elif 'Field Test' in article.title:
+                    feed.articles.remove(article)
+        return feeds
--- a/recipes/ourdailybread.recipe
+++ b/recipes/ourdailybread.recipe
@ -1,3 +1,4 @@
+
 __license__   = 'GPL v3'
 __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -5,16 +6,17 @@ odb.org
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+import uuid
+from lxml import html

 class OurDailyBread(BasicNewsRecipe):
    title                 = 'Our Daily Bread'
-    __author__            = 'Darko Miletic and Sujata Raman'
+    __author__            = 'Kovid Goyal'
    description           = "Our Daily Bread is a daily devotional from RBC Ministries which helps readers spend time each day in God's Word."
    oldest_article        = 15
    language              = 'en'
    max_articles_per_feed = 100
    no_stylesheets        = True
-    auto_cleanup = True
    use_embedded_content  = False
    category              = 'ODB, Daily Devotional, Bible, Christian Devotional, Devotional, RBC Ministries, Our Daily Bread, Devotionals, Daily Devotionals, Christian Devotionals, Faith, Bible Study, Bible Studies, Scripture, RBC, religion'
    encoding              = 'utf-8'
@ -26,12 +28,14 @@ class OurDailyBread(BasicNewsRecipe):
                            ,'linearize_tables' : True
                         }

-    #keep_only_tags    = [dict(attrs={'class':'module-content'})]
-    #remove_tags       = [
-                           #dict(attrs={'id':'article-zoom'})
-                          #,dict(attrs={'class':'listen-now-box'})
-                        #]
-    #remove_tags_after = dict(attrs={'class':'readable-area'})
+    keep_only_tags    = [dict(attrs={'class':'calibre-inserted-psalm'}),
+            {'id':'content'}]
+    remove_tags       = [
+                        dict(attrs={'class':['listen-box', 'entry-zoom',
+                            'entry-footer']}),
+                        {'id':'nav-single'},
+                        dict(attrs={'class':lambda x:x and ' sharing ' in x}),
+                        ]

    extra_css = '''
                .text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -43,18 +47,33 @@ class OurDailyBread(BasicNewsRecipe):

    feeds          = [(u'Our Daily Bread', u'http://odb.org/feed/')]

+    def preprocess_raw_html(self, raw, url):
+        # Convert links to referenced Psalms to the actual psalms
+        root = html.fromstring(raw)
+        for a in root.xpath(
+                '//a[starts-with(@href, "http://www.biblegateway.com")]'):
+            uid = type(u'')(uuid.uuid4())
+            raw = self.index_to_soup(a.get('href'), raw=True)
+            iroot = html.fromstring(raw)
+            matches = iroot.xpath('//div[contains(@class, "result-text-style-normal")]')
+            if matches:
+                div = matches[0]
+                div.getparent().remove(div)
+                root.xpath('//body')[0].append(div)
+                a.set('href', '#'+uid)
+                del a.attrib['target']
+                div.set('id', uid)
+                div.set('class', 'calibre-inserted-psalm')
+                hr = div.makeelement('hr')
+                div.insert(0, hr)
+                # print html.tostring(div)
+                raw = html.tostring(root, encoding=unicode)
+        return raw
+
    def preprocess_html(self, soup):
-        return self.adeify_images(soup)
+        d = soup.find(id='content')
+        d.extract()
+        soup.find('body').insert(0, d)
+        return soup

-    def get_cover_url(self):
-
-        href =  'http://www.rbc.org/index.aspx'
-
-        soup = self.index_to_soup(href)
-        a = soup.find('a',attrs={'id':'ctl00_hlTodaysDevotionalImage'})
-
-        if a :
-           cover_url = a.img['src']
-
-        return cover_url

--- a/recipes/theecocolapse.recipe
+++ b/recipes/theecocolapse.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 theeconomiccollapseblog.com
 '''
@ -11,7 +11,7 @@ class TheEconomicCollapse(BasicNewsRecipe):
    description           = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?'
    publisher             = 'The Economic Collapse'
    category              = 'news, politics, USA, economy'
-    oldest_article        = 2
+    oldest_article        = 7
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
@ -20,7 +20,7 @@ class TheEconomicCollapse(BasicNewsRecipe):
    remove_empty_feeds    = True
    extra_css             = """
                                body{font-family: Tahoma,Arial,sans-serif }
-                                img{margin-bottom: 0.4em}
+                                img{margin-bottom: 0.4em; display: block;}
                            """

    conversion_options = {
@ -35,12 +35,9 @@ class TheEconomicCollapse(BasicNewsRecipe):
                    ,dict(name=['iframe','object','embed','meta','link','base'])
                  ]
    remove_attributes=['lang','onclick','width','height']
-    keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})]
+    keep_only_tags=[
+                     dict(name='div', attrs={'class':'post-headline'}),
+                     dict(name='div', attrs={'class':lambda x: x and 'post-bodycopy' in x.split()})
+                   ]

    feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')]
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return self.adeify_images(soup)
-
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -55,6 +55,7 @@ class ANDROID(USBMS):
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
                       0x70c6 : [0x226],
                       0x4316 : [0x216],
+                       0x4317 : [0x216],
                       0x42d6 : [0x216],
                       0x42d7 : [0x216],
                       0x42f7 : [0x216],
@ -202,7 +203,7 @@ class ANDROID(USBMS):
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
-            'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO']
+            'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -211,7 +212,7 @@ class ANDROID(USBMS):
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
-            'UMS_COMPOSITE', 'PRO']
+            'UMS_COMPOSITE', 'PRO', '.KOBO_VOX']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/ebooks/conversion/plugins/lrf_output.py
+++ b/src/calibre/ebooks/conversion/plugins/lrf_output.py
@ -20,6 +20,11 @@ class LRFOptions(object):
            except:
                return ''
        m = oeb.metadata
+        for x in ('left', 'top', 'right', 'bottom'):
+            attr = 'margin_'+x
+            val = getattr(opts, attr)
+            if val < 0:
+                setattr(opts, attr, 0)
        self.title = None
        self.author = self.publisher = _('Unknown')
        self.title_sort = self.author_sort = ''
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -28,6 +28,7 @@ class EXTHHeader(object): # {{{
        self.start_offset = None
        left = self.num_items
        self.kf8_header = None
+        self.uuid = self.cdetype = None

        while left > 0:
            left -= 1
--- a/src/calibre/library/server/utils.py
+++ b/src/calibre/library/server/utils.py
@ -102,12 +102,8 @@ class AuthController(object):
        @wraps(func)
        def authenticate(*args, **kwargs):
            cookie = cherrypy.request.cookie.get(self.cookie_name, None)
-            ua = cherrypy.request.headers.get('User-Agent', '').strip()

-            if ('iPad;' in ua or 'iPhone;' in ua or (
-                not (allow_cookie_auth and self.is_valid(cookie)))):
-                # Apparently the iPad cant handle this
-                # see https://bugs.launchpad.net/bugs/1013976
+            if not (allow_cookie_auth and self.is_valid(cookie)):
                digest_auth(self.realm, get_ha1_dict_plain(self.users_dict),
                            self.secret)