Sync with trunk.

2026-01-03 02:30:21 -05:00 · 2011-08-28 09:14:56 -04:00 · 2011-08-28 09:14:56 -04:00 · 2222ecca21
commit 2222ecca21
parent a76e9dab89 03ee341892
4 changed files with 93 additions and 3 deletions
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -11,9 +11,14 @@ class HBR(BasicNewsRecipe):
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
+    recipe_disabled = ('hbr.org has started requiring the use of javascript'
+            ' to log into their website. This is unsupported in calibre, so'
+            ' this recipe has been disabled. If you would like to see '
+            ' HBR supported in calibre, contact hbr.org and ask them'
+            ' to provide a javascript free login method.')

-    LOGIN_URL = 'http://hbr.org/login?request_url=/'
-    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
+    LOGIN_URL = 'https://hbr.org/login?request_url=/'
+    LOGOUT_URL = 'https://hbr.org/logout?request_url=/'

    INDEX = 'http://hbr.org/archive-toc/BR'

@ -44,7 +49,8 @@ class HBR(BasicNewsRecipe):
        br['signin-form:username'] = self.username
        br['signin-form:password'] = self.password
        raw = br.submit().read()
-        if 'My Account' not in raw:
+        open('/t/crap.html', 'wb').write(raw)
+        if '>Sign out<' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
        try:
            link = br.find_link(text='Sign out')
--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@ -9,6 +9,11 @@ class HBR(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    language = 'en'
    no_stylesheets = True
+    recipe_disabled = ('hbr.org has started requiring the use of javascript'
+            ' to log into their website. This is unsupported in calibre, so'
+            ' this recipe has been disabled. If you would like to see '
+            ' HBR supported in calibre, contact hbr.org and ask them'
+            ' to provide a javascript free login method.')

    LOGIN_URL = 'http://hbr.org/login?request_url=/'
    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
--- a/recipes/ntv_spor.recipe
+++ b/recipes/ntv_spor.recipe
@ -0,0 +1,34 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1313512459(BasicNewsRecipe):
+    title          = u'NTVSpor'
+    __author__            = 'A Erdogan'
+    description           = 'News from Turkey'
+    publisher             = 'NTVSpor.net'
+    category              = 'sports, Turkey'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    masthead_url          = 'http://www.ntvspor.net/HTML/r/i/l.png'
+    language              = 'tr'
+
+    extra_css             ='''
+                        body{font-family:Arial,Helvetica,sans-serif; font-size:small; align:left;  color:#000000}
+                        h1{font-size:large; color:#000000}
+ 	    h2{font-size:small; color:#000000}
+	    p{font-size:small; color:#000000}
+                     '''
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['embed','il','ul','iframe','object','link','base']), dict(name='div', attrs={'id':'contentPhotoGallery'}), dict(name='div', attrs={'class':'SocialMediaWrapper'}), dict(name='div', attrs={'class':'grid2'}), dict(name='div', attrs={'class':'grid8'}), dict(name='div', attrs={'id':'anonsBar'}), dict(name='div', attrs={'id':'header'})]
+    remove_tags_before = dict(name='h1', attrs={'style':['margin-top: 6px;']})
+    remove_tags_after = dict(name='div', attrs={'id':'newsBody'})
+    feeds          = [(u'NTVSpor', u'http://www.ntvspor.net/Rss/anasayfa')]
+
--- a/recipes/ntv_tr.recipe
+++ b/recipes/ntv_tr.recipe
@ -0,0 +1,45 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NTVMSNBC(BasicNewsRecipe):
+    title          = u'NTV'
+    __author__            = 'A Erdogan'
+    description           = 'News from Turkey'
+    publisher             = 'NTV'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    masthead_url          = 'http://www.ntvmsnbc.com/images/MSNBC/msnbc_ban.gif'
+    language              = 'tr'
+
+
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
+    extra_css             ='''
+                        body{font-family:Arial,Helvetica,sans-serif; font-size:small; align:left;  color:#000000}
+                        h1{font-size:large; color:#000000}
+ 	    h2{font-size:small; color:#000000}
+	    p{font-size:small; color:#000000}
+                     '''
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['embed','il','ul','iframe','object','link','base']), dict(name='div', attrs={'style':['padding: 0pt 10px 10px;']}), dict(name='div', attrs={'style':['padding: 0pt 10px 10px;']}),  dict(name='div', attrs={'class':['textSmallGrey w320']}), dict(name='div', attrs={'style':['font-family:Arial; font-size:16px;font-weight:bold; font-color:#003366; margin-bottom:20px; margin-top:20px; border-bottom:solid 1px;border-color: #CCC; padding-bottom:2px;']})]
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(name='div', attrs={'style':['font-family:Arial; font-size:16px;font-weight:bold; font-color:#003366; margin-bottom:20px; margin-top:20px; border-bottom:solid 1px;border-color: #CCC; padding-bottom:2px;']})
+
+    feeds          = [(u'NTV', u'http://www.ntvmsnbc.com/id/3032091/device/rss/rss.xml')]
+
+    def print_version(self, url):
+        articleid = url.rpartition('/id/')[2]
+        return 'http://www.ntvmsnbc.com/id/' + articleid + '/print/1/displaymode/1098/'
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+