#!/usr/bin/env python # vim:fileencoding=utf-8 # Calibre recipe for Instapaper.com (Stable version) # # Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/ # Source: https://github.com/kovidgoyal/calibre/blob/master/recipes/instapaper.recipe # Last updated 10 July 2024 from calibre.web.feeds.news import BasicNewsRecipe # The Gutenweb stylesheet from https://www.mobileread.com/forums/showpost.php?p=2809828&postcount=31 gutenweb = '''"html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td,article,aside,canvas,details,embed,figure,figcaption,footer,header,hgroup,menu,nav,output,ruby,section,summary,time,mark,audio,video{margin:0;padding:0;border:0;font-size:100%;font:inherit;vertical-align:baseline}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{display:block}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:\'\';content:none}table{border-collapse:collapse;border-spacing:0}html,:root{font-size:16px}body{font-size:1em;line-height:1.5em;margin-top:1.5em;margin-bottom:1.5em;max-width:33em;margin-left:auto;margin-right:auto;font-family:Helvetica,Arial,sans-serif;text-align:left;word-spacing:normal;hyphens:auto;orphans:2;widows:2;font-variant-numeric:oldstyle-nums}body *{max-width:100%}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:1.5em;margin-bottom:1.5em}p{margin-top:1.5em;margin-bottom:0em}p+p{margin-top:0em;margin-bottom:0em;text-indent:1.5em}h1{font-size:2.25em;line-height:1.33333em;margin-top:0.66667em;margin-bottom:0.66667em}h2{font-size:1.5em;line-height:1em;margin-top:1em;margin-bottom:1em}h3{font-size:1.3125em;line-height:1.14286em;margin-top:1.14286em;margin-bottom:1.14286em}h4{font-size:1.125em;line-height:1.33333em;margin-top:1.33333em;margin-bottom:1.33333em}h1,h2,h3,h4,h5,h6{font-family:Georgia,serif;font-weight:bold;page-break-after:avoid}ul li{list-style-type:disc}ol li{list-style-type:decimal}li{list-style-position:inside;text-indent:1.5em}dt{font-weight:bold;float:left;margin-right:1.5em}tr{page-break-before:avoid;page-break-after:avoid}td,th{outline:0.1em solid #000;padding:0 0.5em;text-align:left}tfoot td{font-style:italic}caption{font-style:italic;text-align:center;font-style:italic}blockquote{margin-top:2.25em;margin-bottom:2.25em;margin-left:2.25em;margin-right:2.25em}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figure{text-align:center}figure img,figure audio,figure canvas,figure video,figure table{margin-top:0;margin-bottom:0}figcaption{font-size:0.875em;line-height:1.71429em;margin-top:0em;margin-bottom:1.71429em;font-style:italic}img{vertical-align:bottom}code,samp,kbd,var{font-family:Consolas,"Liberation Mono",Courier,monospace;font-size:0.875em;font-weight:normal;font-style:normal;text-decoration:none;line-height:0.875em;padding:0 0.3em}mark{background:#ff0;color:#000}code,.code,samp,kbd,var{background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}em{font-style:italic}strong{font-weight:bold}abbr{letter-spacing:0.1em}abbr[title]{border-bottom:1px dotted #000}cite,q{font-style:italic}q{font-style:italic;quotes:"\xe2\x80\x9c" "\xe2\x80\x9d" "\xe2\x80\x98" "\xe2\x80\x99"}q:before{content:open-quote}q:after{content:close-quote}dfn{font-style:italic}sup,sub{font-size:70%;line-height:70%;position:relative}sup{top:-0.5em}sub{top:0.5em}hr{border-bottom:0.0625em solid #000;border-top:0 none;border-left:0 none;border-right:0 none;margin-top:1.4375em;margin-bottom:1.5em}small{font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}i{font-style:italic}b{font-weight:bold}u{text-decoration:underline}s{text-decoration:line-through}ins{font-weight:bold;text-decoration:underline}del{text-decoration:line-through}.caps,.nums{letter-spacing:0.1em}.caps{font-variant-numeric:lining-nums}.code{overflow:auto;padding:0 1em;background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}.code code,.code samp,.code kbd,.code var{box-shadow:none;padding:0}.chapter{page-break-after:auto;page-break-before:always}.note{text-indent:0;font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}.verse{font-family:inherit;display:table;width:auto;margin-left:auto;margin-right:auto}.toc{margin:0 auto}.toc td,.toc th{outline:0 none}.toc th{padding:0 0.5em 0 0;text-align:right;font-weight:normal}.toc td:before{content:"\\2022";padding-right:0.5em}.toc td{padding:0;text-align:left;font-style:italic}@page{margin-top:72pt;margin-bottom:72pt}@media print{body{font-size:12pt;line-height:18pt;margin-top:0pt;margin-bottom:0pt;font-family:"Times New Roman",Times,serif}p{margin-top:18pt;margin-bottom:0pt}p+p{text-indent:18pt}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:18pt;margin-bottom:18pt}h1{font-size:21pt;line-height:36pt;margin-top:18pt;margin-bottom:18pt}h2{font-size:18pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h3{font-size:16pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h4{font-size:14pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}dt{margin-right:18pt}li{text-indent:18pt}blockquote{margin-top:27pt;margin-bottom:27pt;margin-left:27pt;margin-right:27pt}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figcaption{font-size:10pt;line-height:18pt;margin-top:0pt;margin-bottom:18pt}pre{white-space:pre-line}abbr[title]{border-bottom:0 none}small{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}hr{border-bottom:0.08333em solid #000;margin-top:17pt;margin-bottom:18pt}.note{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}}''' # noqa: E501 class InstapaperRecipe(BasicNewsRecipe): title = 'Instapaper' __author__ = 'Darko Miletic, Stanislav Khromov, Jim Ramsay, Stephan Hügel' publisher = 'Instapaper.com' publication_type = 'magazine' description = 'A digest of your unread Instapaper articles' compress_news_images_max_size = 250 category = 'info, custom, Instapaper' oldest_article = 365 max_articles_per_feed = 100 resolve_internal_links = True no_stylesheets = False timefmt = ' [%a, %d %b, %Y]' extra_css = gutenweb masthead_url = 'https://d2kfnvwohu0503.cloudfront.net/img/logo@2x.png' encoding = 'utf-8' language = 'en' remove_javascript = True recipe_specific_options = { 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', 'default': str(oldest_article) } } def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) remove_tags = [ dict(name='div', attrs={'id': 'reflow'}), dict(name='div', attrs={'id': 'modal_backer'}), dict(name='div', attrs={'id': 'speedTimeRemaining'}), dict(name='div', attrs={'id': 'highlight_create_popover'}), dict(name='div', attrs={'id': 'highlight_delete_popover'}), dict(name='div', attrs={'id': 'summary_highlight_popover'}), dict(name='script'), dict(name='section', attrs={'class': 'primary_bar'}), dict(name='div', attrs={'class': 'modal_group'}), dict(name='div', attrs={'class': 'page_header_read'}), dict(name='div', attrs={'class': 'side_drawer_container'}), dict(name='div', attrs={'class': 'modal_name'}), dict(name='div', attrs={'class': 'highlight_popover'}), dict(name='div', attrs={'class': 'bar bottom'}), dict(name='div', attrs={'class': 'evernote_confirm'}), dict(name='div', attrs={'id': 'controlbar_container'}), dict(name='div', attrs={'id': 'footer'}), dict(name='div', attrs={'id': 'speedRead'}), dict(name='label'), ] use_embedded_content = False needs_subscription = True INDEX = 'https://www.instapaper.com' LOGIN = INDEX + '/user/login' feeds = [('Instapaper Unread', 'https://www.instapaper.com/u')] remove_empty_feeds = True def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['username'] = self.username if self.password is not None: br['password'] = self.password br.submit() return br def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, 'Fetching feed' + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('a', attrs={'class': 'article_title'}): articles.append( { 'url': 'https://www.instapaper.com' + item['href'], 'title': item['title'], } ) totalfeeds.append((feedtitle, articles)) return totalfeeds