From ac0d67ee6fe11e05736babd9a45285ba9b84437b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 Mar 2020 17:49:32 +0530 Subject: [PATCH] Remove non-working recipes --- recipes/icons/rossijkaja_gazeta.png | Bin 620 -> 0 bytes recipes/icons/vedomosti.png | Bin 430 -> 0 bytes recipes/rossijkaja_gazeta.recipe | 72 ---------- recipes/vedomosti.recipe | 207 ---------------------------- 4 files changed, 279 deletions(-) delete mode 100644 recipes/icons/rossijkaja_gazeta.png delete mode 100644 recipes/icons/vedomosti.png delete mode 100644 recipes/rossijkaja_gazeta.recipe delete mode 100644 recipes/vedomosti.recipe diff --git a/recipes/icons/rossijkaja_gazeta.png b/recipes/icons/rossijkaja_gazeta.png deleted file mode 100644 index 3b883db5b4e1deb3ffb4f24a4771df7694a482c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 620 zcmV-y0+aoTP)3voVtf?AHi-C$(%3@jLP_pT!M_;rXmHE2Vm0$Lxc&wE948Wv>plL}@T6#9 z)07T#A^4fYzm;ozLDi4tl`BQDEm*esBYKp#>nc9+GQ11QUF!W*?kuPd!&l>qV1fPkO@ZR%-7~eoD=;} zhq=7L_yqn)pEjOMI9cN}`6Mt2HUNGve*g#nf^mQ!GmL-w!g6O<`xkrw0000K>Jy%)zg zaqgn@o-j5LgznBRw4+zogMi~A9|q)DRG0|rAg6DFcAn4tcdf^Ea1)Dxp4 zO0dMQ0%d2t9-?^)nPe0wXIn~+q%AIp-g)76*i|>7Lo6ItZsmeX?uF0@=0r1vtC0|rZH)`c+2>XQ3YSR^wpW>eMijnFk46x<{907*qoM6N<$g8nGJR{#J2 diff --git a/recipes/rossijkaja_gazeta.recipe b/recipes/rossijkaja_gazeta.recipe deleted file mode 100644 index 9929eb7350..0000000000 --- a/recipes/rossijkaja_gazeta.recipe +++ /dev/null @@ -1,72 +0,0 @@ -# vim:fileencoding=utf-8 -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdjectiveSpecies(BasicNewsRecipe): - title = u'Российская Газета' - __author__ = 'bug_me_not' - cover_url = u'http://img.rg.ru/img/d/logo2012.png' - description = 'Российская Газета' - publisher = 'Правительство Российской Федерации' - category = 'news' - language = 'ru' - no_stylesheets = True - remove_javascript = True - oldest_article = 300 - max_articles_per_feed = 100 - - remove_tags_before = dict(name='h1') - remove_tags_after = dict(name='div', attrs={'class': 'ar-citate'}) - remove_tags = [dict(name='div', attrs={'class': 'insert_left'}), - dict(name='a', attrs={'href': '#comments'}), - dict(name='div', attrs={'class': 'clear'}), - dict(name='div', attrs={'class': 'ar-citate'}), - dict(name='div', attrs={'class': 'ar-social red'}), - dict(name='div', attrs={'class': 'clear clear-head'}), ] - - feeds = [ - (u'Все материалы', u'http://www.rg.ru/tema/rss.xml'), - (u'Еженедельный выпуск', - u'http://www.rg.ru/tema/izd-subbota/rss.xml'), - (u'Государство', - u'http://www.rg.ru/tema/gos/rss.xml'), - (u'Экономика', - u'http://www.rg.ru/tema/ekonomika/rss.xml'), - (u'Бизнес', - u'http://www.rg.ru/tema/izd-biznes/rss.xml'), - (u'В мире', u'http://www.rg.ru/tema/mir/rss.xml'), - (u'Происшествия', - u'http://www.rg.ru/tema/bezopasnost/rss.xml'), - (u'Общество', - u'http://www.rg.ru/tema/obshestvo/rss.xml'), - (u'Культура', - u'http://www.rg.ru/tema/kultura/rss.xml'), - (u'Спорт', u'http://www.rg.ru/tema/sport/rss.xml'), - (u'Документы', u'http://rg.ru/tema/doc-any/rss.xml'), - (u'РГ: Башкортостан', - u'http://www.rg.ru/org/filial/bashkortostan/rss.xml'), - (u'РГ: Волга-Кама', - u'http://www.rg.ru/org/filial/volga-kama/rss.xml'), - (u'РГ: Восточная Сибирь', - u'http://www.rg.ru/org/filial/enisey/rss.xml'), - (u'РГ: Дальний Восток', - u'http://www.rg.ru/org/filial/dvostok/rss.xml'), - (u'РГ: Кубань. Северный Кавказ', - u'http://www.rg.ru/org/filial/kuban/rss.xml'), - (u'РГ: Пермский край', - u'http://www.rg.ru/org/filial/permkray/rss.xml'), - (u'РГ: Приволжье', - u'http://www.rg.ru/org/filial/privolzhe/rss.xml'), - (u'РГ: Северо-Запад', - u'http://www.rg.ru/org/filial/szapad/rss.xml'), - (u'РГ: Сибирь', - u'http://www.rg.ru/org/filial/sibir/rss.xml'), - (u'РГ: Средняя Волга', - u'http://www.rg.ru/org/filial/svolga/rss.xml'), - (u'РГ: Урал и Западная Сибирь', - u'http://www.rg.ru/org/filial/ural/rss.xml'), - (u'РГ: Центральная Россия', - u'http://www.rg.ru/org/filial/roscentr/rss.xml'), - (u'РГ: Юг России', - u'http://www.rg.ru/org/filial/jugrossii/rss.xml'), - ] diff --git a/recipes/vedomosti.recipe b/recipes/vedomosti.recipe deleted file mode 100644 index 0270e221b1..0000000000 --- a/recipes/vedomosti.recipe +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python2 - -u''' -Ведомости -''' - -from calibre.web.feeds.feedparser import parse -from calibre.ebooks.BeautifulSoup import Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class VedomostiRecipe(BasicNewsRecipe): - title = u'Ведомости' - __author__ = 'Nikolai Kotchetkov' - publisher = 'vedomosti.ru' - category = 'press, Russia' - description = u'Ежедневная деловая газета' - oldest_article = 3 - max_articles_per_feed = 100 - - masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif' - cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif' - - # Add feed names if you want them to be sorted (feeds of this list appear - # first) - sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги'] - - encoding = 'cp1251' - language = 'ru' - no_stylesheets = True - remove_javascript = True - recursions = 0 - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict(name='td', attrs={'class': ['second_content']})] - - remove_tags_after = [dict(name='div', attrs={'class': 'article_text'})] - - remove_tags = [ - dict(name='div', attrs={'class': ['sep', 'choice', 'articleRightTbl']})] - - feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml'] - - # base URL for relative links - base_url = u'http://www.vedomosti.ru' - - extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\ - 'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\ - 'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\ - '.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\ - '.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\ - '.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\ - '.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\ - '.article_desc {font-size: 1em; font-style:italic;}' - - def parse_index(self): - try: - feedData = parse(self.feeds[0]) - if not feedData: - raise NotImplementedError - self.log("parse_index: Feed loaded successfully.") - try: - if feedData.feed.title: - self.title = feedData.feed.title - self.log("parse_index: Title updated to: ", self.title) - except Exception: - pass - try: - if feedData.feed.description: - self.description = feedData.feed.description - self.log("parse_index: Description updated to: ", - self.description) - except Exception: - pass - - def get_virtual_feed_articles(feed): - if feed in feeds: - return feeds[feed][1] - self.log("Adding new feed: ", feed) - articles = [] - feeds[feed] = (feed, articles) - return articles - - feeds = {} - - # Iterate feed items and distribute articles using tags - for item in feedData.entries: - link = item.get('link', '') - title = item.get('title', '') - if '' == link or '' == title: - continue - article = {'title': title, 'url': link, 'description': item.get( - 'description', ''), 'date': item.get('date', ''), 'content': ''} - if not item.get('tags'): # noqa - get_virtual_feed_articles('_default').append(article) - continue - for tag in item.tags: - addedToDefault = False - term = tag.get('term', '') - if '' == term: - if (not addedToDefault): - get_virtual_feed_articles( - '_default').append(article) - continue - get_virtual_feed_articles(term).append(article) - - # Get feed list - # Select sorted feeds first of all - result = [] - for feedName in self.sortOrder: - if (not feeds.get(feedName)): - continue - result.append(feeds[feedName]) - del feeds[feedName] - result = result + feeds.values() - - return result - - except Exception as err: - self.log(err) - raise NotImplementedError - - def preprocess_html(self, soup): - return self.adeify_images(soup) - - def postprocess_html(self, soup, first_fetch): - - # Find article - contents = soup.find('div', {'class': ['article_text']}) - if not contents: - self.log('postprocess_html: article div not found!') - return soup - contents.extract() - - # Find title - title = soup.find('h1') - if title: - contents.insert(0, title) - - # Find article image - newstop = soup.find('div', {'class': ['newstop']}) - if newstop: - img = newstop.find('img') - if img: - imgDiv = new_tag(soup, 'div') - imgDiv['class'] = 'article_img' - - if img.get('width'): - del(img['width']) - if img.get('height'): - del(img['height']) - - # find description - element = img.parent.nextSibling - - img.extract() - imgDiv.insert(0, img) - - while element: - if not isinstance(element, Tag): - continue - nextElement = element.nextSibling - if 'p' == element.name: - element.extract() - element['class'] = 'article_img_desc' - imgDiv.insert(len(imgDiv.contents), element) - element = nextElement - - contents.insert(1, imgDiv) - - # find article abstract - abstract = soup.find('p', {'class': ['subhead']}) - if abstract: - abstract['class'] = 'article_desc' - contents.insert(2, abstract) - - # Find article authors - authorsDiv = soup.find('div', {'class': ['autors']}) - if authorsDiv: - authorsP = authorsDiv.find('p') - if authorsP: - authorsP['class'] = 'article_authors' - contents.insert(len(contents.contents), authorsP) - - # Fix urls that use relative path - urls = contents.findAll('a', href=True) - if urls: - for url in urls: - if '/' == url['href'][0]: - url['href'] = self.base_url + url['href'] - - body = soup.find('td', {'class': ['second_content']}) - if body: - body.replaceWith(contents) - - self.log('Result: ', soup.prettify()) - return soup