diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index 58c62e20e9..06c8e500e4 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -1,129 +1,78 @@ -## -## web2lrf profile to download articles from Barrons.com -## can download subscriber-only content if username and -## password are supplied. -## -''' -''' - -import re - from calibre.web.feeds.news import BasicNewsRecipe class Barrons(BasicNewsRecipe): - title = 'Barron\'s' - max_articles_per_feed = 50 - needs_subscription = True - language = 'en' + title = 'Barron\'s' + max_articles_per_feed = 50 + needs_subscription = True + language = 'en' - __author__ = 'Kovid Goyal and Sujata Raman' - description = 'Weekly publication for investors from the publisher of the Wall Street Journal' - timefmt = ' [%a, %b %d, %Y]' - use_embedded_content = False - no_stylesheets = True - match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] - conversion_options = {'linearize_tables': True} - ##delay = 1 + __author__ = 'Kovid Goyal' + description = 'Weekly publication for investors from the publisher of the Wall Street Journal' + timefmt = ' [%a, %b %d, %Y]' + use_embedded_content = False + no_stylesheets = True + match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] + conversion_options = {'linearize_tables': True} + ##delay = 1 - ## Don't grab articles more than 7 days old - oldest_article = 7 - use_javascript_to_login = True - requires_version = (0, 9, 16) + # Don't grab articles more than 7 days old + oldest_article = 7 + use_javascript_to_login = True + requires_version = (0, 9, 16) - extra_css = ''' - .datestamp{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;} - h3{font-family:Georgia,"Times New Roman",Times,serif; } - h2{font-family:Georgia,"Times New Roman",Times,serif; } - h1{ font-family:Georgia,"Times New Roman",Times,serif; } - .byline{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;} - .subhead{font-family:Georgia,"Times New Roman",Times,serif; font-size: small;} - .articlePage{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;} - .insettipUnit{font-size: x-small;} - ''' - remove_tags = [ - dict(name ='div', attrs={'class':['sTools sTools-t', 'tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}), - dict(name = 'a', attrs ={'class':'insetClose'}) - ] + keep_only_tags = [dict(attrs={'class':lambda x: x and (x.startswith('sector one column') or x.startswith('sector two column'))})] + remove_tags = [ + dict(name='div', attrs={'class':['sTools sTools-t', 'tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}), + dict(attrs={'class':['insetButton', 'insettipBox', 'insetClose']}), + dict(attrs={'data-module-name':['resp.module.trendingNow.BarronsDesktop', 'resp.module.share_tools.ShareTools']}), + dict(name='span', attrs={'data-country-code':True, 'data-ticker-code':True}), + ] - preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ - ## Remove anything before the body of the article. - (r'