from calibre import strftime from calibre.web.feeds.recipes import BasicNewsRecipe class NYTimes(BasicNewsRecipe): appURL = 'https://www.framabag.org' title = 'wallabag' __author__ = 'Xavier Detant' description = 'Get your wallabag from your framabag account. wallabag is a self hosted read it later platform' needs_subscription = True remove_tags_before = dict(id='article') remove_tags_after = dict(id='article') def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open(self.appURL + '/u/' + self.username) br.select_form(name='loginform') br['login'] = self.username br['password'] = self.password br.submit() return br def parse_index(self): baseURL = self.appURL + '/u/'+self.username+'/' soup = self.index_to_soup(baseURL+'index.php') articles = {} key = None ans = [] for div in soup.findAll(True,attrs={'class':['entrie']}): a = div.find('a', href=True) if not a: continue key = self.tag_to_string(div.find('a',attrs={'class':['reading-time']})) url = baseURL + a['href'] title = self.tag_to_string(a, use_alt=False) description = '' pubdate = strftime('%a, %d %b') summary = div.find('p') if summary: description = self.tag_to_string(summary, use_alt=False) feed = key if key is not None else 'Uncategorized' if feed not in articles: articles[feed] = [] articles[feed].append(dict(title=title, url=url, date=pubdate,description=description,content='')) ans = [(keyl, articles[keyl]) for keyl in articles.keys()] return ans