Merge branch 'master' of https://github.com/CoderAllan/calibre

2025-07-08 18:54:09 -04:00 · 2016-10-12 11:06:15 +05:30 · 2016-10-12 11:06:15 +05:30 · cf747d617b
commit cf747d617b
parent 799ed0a4ba c851f1125c
6 changed files with 30 additions and 102 deletions
--- a/recipes/infoworld.recipe
+++ b/recipes/infoworld.recipe
@ -18,17 +18,8 @@ class Engadget(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
+    auto_cleanup = True

-    remove_tags = [dict(name='div', attrs={'class': ["articleTools clearfix", "relatedContent", "pagination clearfix", "addResources"]}),
-                   dict(name='div', attrs={'id': ["post-socialPromoBlock"]})]
-
-    keep_only_tags = [dict(name='div', attrs={'class': ["article"]})]
-
-    feeds = [(u'Top Tech Stories', u'http://infoworld.com/homepage/feed'),
-             (u'Today\'s Tech Headlines', u'http://www.infoworld.com/news/feed')]
-
-    def get_article_url(self, article):
-
-        url = article.get('link', None)
-
-        return url
+    feeds = [(u'Top Tech Stories', u'http://www.infoworld.com/index.rss'),
+             (u'Today\'s Tech Headlines',
+              u'http://www.infoworld.com/news/index.rss')]
--- a/recipes/journalgazette.recipe
+++ b/recipes/journalgazette.recipe
@ -17,37 +17,14 @@ class AdvancedUserRecipe1283666183(BasicNewsRecipe):
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
-    keep_only_tags = [dict(name='div', attrs={'id': 'mainContent'})]
-    extra_css = '#copyinfo { font-size: 6 ;} \n #photocredit { font-size: 6 ;} \n  .pubinfo { font-size: 6 ;}'
-    masthead_url = 'http://www.journalgazette.net/img/icons/jgmini.gif'
-#    cover_url = 'http://www.journalgazette.net/img/icons/jgmini.gif'
-    encoding = 'cp1252'
+    auto_cleanup = True

-    feeds = [(u'Opinion', u'http://journalgazette.net/apps/pbcs.dll/section?Category=EDIT&template=blogrss&mime=xml'),
-             (u'Local News', u'http://journalgazette.net/apps/pbcs.dll/section?Category=LOCAL&template=blogrss&mime=xml'),
-             (u'Sports', u'http://journalgazette.net/apps/pbcs.dll/section?Category=SPORTS&template=blogrss&mime=xml'),
-             (u'Features', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FEAT&template=blogrss&mime=xml'),
-             (u'Business', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BIZ&template=blogrss&mime=xml'),
-             (u'Ice Chips', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BLOGS11&template=blogrss&mime=xml '),
-             (u'Entertainment', u'http://journalgazette.net/apps/pbcs.dll/section?Category=ENT&template=blogrss&mime=xml'),
-             (u'Food', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FOOD&template=blogrss&mime=xml')
+    feeds = [(u'Opinion', u'http://www.journalgazette.net/opinion/rss/'),
+             (u'Local News', u'http://www.journalgazette.net/news/local/rss/'),
+             (u'Sports', u'http://www.journalgazette.net/sports/rss/'),
+             (u'Business', u'http://www.journalgazette.net/business/rss/'),
+             (u'Entertainment',
+              u'http://www.journalgazette.net/entertainment/rss/'),
+             (u'Food', u'http://www.journalgazette.net/food/'),
+             (u'Blogs', u'http://www.journalgazette.net/blog/rss/'),
             ]
-
-    def print_version(self, url):
-        split1 = url.split("/")
-        # print 'THE SPLIT IS: ', split1
-        url3 = split1[2]
-        url5 = split1[4]
-        url6 = split1[5]
-        url7 = split1[6]
-
-        print_url = 'http://' + url3 + '/apps/pbcs.dll/article?AID=/' + \
-            url5 + '/' + url6 + '/' + url7 + '/-1/EDIT01&template=printart'
-        # print 'THIS URL WILL PRINT: ', print_url # this is a test string to
-        # see what the url is it will return
-        return print_url
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
--- a/recipes/jp_dk.recipe
+++ b/recipes/jp_dk.recipe
@ -1,4 +1,3 @@
-
 __license__ = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -18,31 +17,12 @@ class JP_dk(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
-    encoding = 'cp1252'
    language = 'da'
-
-    extra_css = ' body{font-family: Arial,Verdana,Helvetica,Geneva,sans-serif } h1{font-family: Times,Georgia,Verdana,serif } '
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
+    auto_cleanup = True

    feeds = [
-
        (u'Tophistorier', u'http://www.jp.dk/rss/topnyheder.jsp'),
-    (u'Seneste nyt', u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'),
+        (u'Seneste nyt',
+         u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'),
        (u'Indland', u'http://www.jp.dk/rss/indland.jsp'),
-    (u'Udland', u'http://www.jp.dk/rss/udland.jsp'),
-    (u'Ny viden', u'http://www.jp.dk/rss/nyviden.jsp'),
-    (u'Timeout', u'http://www.jp.dk/rss/timeout.jsp'),
-    (u'Kultur', u'http://www.jp.dk/rss/kultur.jsp'),
-    (u'Sport', u'http://www.jp.dk/rss/sport.jsp')
    ]
-
-    remove_tags = [
-        dict(name=['object', 'link']), dict(
-            name='p', attrs={'class': 'artByline'})
-    ]
-
-    def print_version(self, url):
-        return url + '?service=printversion'
--- a/recipes/jpost.recipe
+++ b/recipes/jpost.recipe
@ -15,9 +15,10 @@ class JerusalemPost(BasicNewsRecipe):
    no_stylesheets = True

    feeds = [('Front Page', 'http://www.jpost.com/Rss/RssFeedsFrontPage.aspx'),
-             ('Israel News', 'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'),
-             ('Middle East News', 'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'),
+             ('Israel News',
+              'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'),
+             ('Middle East News',
+              'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'),
             ('International News',
-              'http://www.jpost.com/Rss/RssFeedsInternationalNews.aspx'),
-             ('Editorials', 'http://www.jpost.com/Rss/RssFeedsEditorialsNews.aspx'),
+              'http://www.jpost.com/Rss/RssFeedsPolitiqueetsocial.aspx'),
             ]
--- a/recipes/las_vegas_review.recipe
+++ b/recipes/las_vegas_review.recipe
@ -18,7 +18,6 @@ class AdvancedUserRecipe1274742400(BasicNewsRecipe):
        (u'Top Stories', u'http://www.reviewjournal.com/rss.xml'),
        (u'News', u'http://www.reviewjournal.com/news/feed'),
        (u'Business', u'http://www.reviewjournal.com/business/feed'),
-        (u'Living', u'http://www.reviewjournal.com/living/feed'),
        (u'Opinion', u'http://www.reviewjournal.com/opinion/feed'),
        (u'Neon', u'http://www.reviewjournal.com/neon/feed'),
        (u'Sports', u'http://www.reviewjournal.com/sports/feed')]
--- a/recipes/mdj.recipe
+++ b/recipes/mdj.recipe
@ -12,33 +12,13 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
-
-    masthead_url = 'http://assets.matchbin.com/sites/624/assets/logo.gif'
-
-    keep_only_tags = [
-        dict(name='div', attrs={'id': ['print_content_container']})
-
-    ]
+    auto_cleanup = True

    feeds = [
-        ('Local News', 'http://mdjonline.com/rss/rss/Local+News?content_type=article&tags=news&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Local+News'),  # noqa
-        ('Sports',   'http://mdjonline.com/rss/rss/Sports?content_type=article&tags=sports&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Sports'),
-        ('Obits',    'http://mdjonline.com/rss/rss/Obits?content_type=article&tags=obits&page_name=rss&tag_inclusion=or&offset=0&limit=20&instance=Obits'),
-        ('Editorial & oped',   'http://mdjonline.com/rss/rss/Editorial+and+OPED?content_type=article&tags=oped+editorial&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Editorial+and+OPED'),  # noqa
-        ('Lifestyle',   'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle'),  # noqa
-        ('Blogs',   'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle')  # noqa
+        ('Local News', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=news/local&amp;l=50&amp;s=start_time&amp;sd=desc'),  # noqa
+        ('Sports', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=sports&amp;l=50&amp;s=start_time&amp;sd=desc'),
+        ('Obits', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=obituaries&amp;l=50&amp;s=start_time&amp;sd=desc'),
+        ('Editorial & oped', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=opinion/mdj_editorials&amp;l=50&amp;s=start_time&amp;sd=desc'),  # noqa
+        ('Lifestyle', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=news/lifestyle&amp;l=50&amp;s=start_time&amp;sd=desc'),  # noqa
+        ('Blogs', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=opinion/blogs&amp;l=50&amp;s=start_time&amp;sd=desc')  # noqa
    ]
-
-    def print_version(self, url):
-        split1 = url.split("/")
-        artid = split1[4]
-
-        # example of link to convert
-        # Original link: http://mdjonline.com/bookmark/9274197
-        # print version: http://mdjonline.com/printer_friendly/9274197
-
-        print_url = 'http://mdjonline.com/printer_friendly/' + artid
-        return print_url
-
-        # test with ebook-convert nejm.recipe output_dir --test -vv >
-        # myrecipe.txt