Merge branch 'master' of https://github.com/jony0008/calibre

2025-07-09 03:04:10 -04:00 · 2020-03-30 08:08:00 +05:30 · 2020-03-30 08:08:00 +05:30 · 47fbf3b885
commit 47fbf3b885
parent 023204cda8 f3111e2a72
20 changed files with 0 additions and 4097 deletions
--- a/recipes/bbc_arabic.recipe
+++ b/recipes/bbc_arabic.recipe
@ -1,21 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BBCArabic(BasicNewsRecipe):
    title = u'BBC Arabic Middle East'
    oldest_article = 7
    max_articles_per_feed = 100
    extra_css = 'body { text-align: right;  direction:rtl; } '
    auto_cleanup = True
    language = 'ar'
    __author__ = 'logophile777'
    remove_tags = [
        {'class': ['emp-alt-handheld', 'emp-noflash',
                   'emp-flashlink', 'emp-alt-screen']}
    ]
    feeds = [(u'BBC Arabic Middle East',
              u'http://www.bbc.co.uk/arabic/middleeast/index.xml')]
    def print_version(self, url):
        return url + '?print=1'
--- a/recipes/bbc_chinese.recipe
+++ b/recipes/bbc_chinese.recipe
@ -1,46 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277443634(BasicNewsRecipe):
    title = u'BBC Chinese'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds = [
        (u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
        (u'\u56fd\u9645\u65b0\u95fb',
         u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
        (u'\u4e24\u5cb8\u4e09\u5730',
         u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
        (u'\u91d1\u878d\u8d22\u7ecf',
         u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
        (u'\u7f51\u4e0a\u4e92\u52a8',
         u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
        (u'\u97f3\u89c6\u56fe\u7247',
         u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
        (u'\u5206\u6790\u8bc4\u8bba',
         u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
    ]
    extra_css = '''
        @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
    body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
                    h1 {font-family: 'DroidFont', serif;}\n
                    .articledescription {font-family: 'DroidFont', serif;}
            '''
    __author__ = 'rty'
    __version__ = '1.0'
    language = 'zh'
    pubisher = 'British Broadcasting Corporation'
    description = 'BBC news in Chinese'
    category = 'News, Chinese'
    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    encoding = 'UTF-8'
    conversion_options = {'linearize_tables': True}
    masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
    keep_only_tags = [
        dict(name='h1'),
        dict(name='p', attrs={'class': ['primary-topic', 'summary']}),
        dict(name='div', attrs={'class': ['bodytext', 'datestamp']}),
    ]
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@ -1,44 +0,0 @@
 __license__ = 'GPL v3'
 __copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
 '''
 www.berria.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Berria(BasicNewsRecipe):
    title = 'Berria'
    __author__ = 'Alayn Gortazar'
    description = 'Euskal Herriko euskarazko egunkaria'
    publisher = 'Berria'
    category = 'news, politics, sports, Basque Country'
    oldest_article = 2
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    language = 'eu'
    remove_empty_feeds = True
    masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
    keep_only_tags = [
        dict(id='goiburua'),
        dict(name='div', attrs={'class': ['ber_ikus']}),
        dict(name='section', attrs={'class': 'ber_ikus'})
    ]
    remove_tags = [
        dict(name='a', attrs={'class': 'iruzkinak'}),
        dict(name='div', attrs={'class': 'laguntzaileak'})
    ]
    extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}'  # noqa
    feeds = [
        (u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
        (u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
        (u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
        (u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
        (u'Mundua', u'http://berria.info/rss/mundua.xml'),
        (u'Kirola', u'http://berria.info/rss/kirola.xml'),
        (u'Plaza', u'http://berria.info/rss/plaza.xml')
    ]
--- a/recipes/blog_da_cidadania.recipe
+++ b/recipes/blog_da_cidadania.recipe
@ -1,20 +0,0 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class BlogdaCidadania(BasicNewsRecipe):
    title = 'Blog da Cidadania'
    __author__ = 'Diniz Bortolotto'
    description = 'Posts do Blog da Cidadania'
    oldest_article = 7
    max_articles_per_feed = 50
    encoding = 'utf8'
    publisher = 'Eduardo Guimaraes'
    category = 'politics, Brazil'
    language = 'pt_BR'
    publication_type = 'politics portal'
    feeds = [(u'Blog da Cidadania', u'http://www.blogcidadania.com.br/feed/')]
    reverse_article_order = True
--- a/recipes/caijing.recipe
+++ b/recipes/caijing.recipe
@ -1,90 +0,0 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 __license__ = 'GPL v3'
 class Caijing(BasicNewsRecipe):
    '''based on the recipe wrote by Eric Chen at 2011'''
    __author__ = '2014, Chen Wei <weichen302@gmx.com>'
    title = 'Caijing Magazine'
    description = '''
    Founded in 1998, the fortnightly CAIJING Magazine has firmly established
    itself as a news authority and leading voice for business and financial
    issues in China.
    CAIJING Magazine closely tracks the most important aspects of China's
    economic reforms, developments and policy changes, as well as major events
    in the capital markets. It also offers a broad international perspective
    through first-hand reporting on international political and economic
    issues.
    CAIJING Magazine is China's most widely read business and finance magazine,
    with a circulation of 225,000 per issue. It boasts top-level readers from
    government, business and academic circles.'''
    language = 'zh'
    encoding = 'UTF-8'
    publisher = 'Caijing Magazine'
    publication_type = 'magazine'
    category = 'news, Business, China'
    timefmt = ' [%a, %d %b, %Y]'
    needs_subscription = True
    remove_tags = [dict(attrs={'class': ['head_nav', 'mcont_logo', 'header',
                                         'bottom', 'footer', 'magazine_ipad', 'cjartShare', 'ar_about',
                                         'main_rt', 'mcont_nav', 'new']}),
                   dict(attrs={'id': ['articlePl']}),
                   dict(name=['script', 'noscript', 'style'])]
    no_stylesheets = True
    remove_javascript = True
    current_issue_url = ""
    current_issue_cover = ""
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://service.caijing.com.cn/usermanage/login')
            br.select_form(name='mainLoginForm')
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        soup_start = self.index_to_soup('http://magazine.caijing.com.cn/')
        jumpurl = soup_start.find('script').contents[0].split()
        for line in jumpurl:
            if 'http' in line.lower():
                issuesurl = line.split('"')[1]
                break
        soup_issues = self.index_to_soup(issuesurl)
        # find the latest issue
        div = soup_issues.find('div', attrs={'class': 'fmcon'})
        current_issue_url = div.find('a', href=True)['href']
        soup = self.index_to_soup(current_issue_url)
        coverimg = soup.find('div', {'class': 'zzfm_img'})
        self.current_issue_cover = coverimg.find('img')['src']
        feeds = []
        for section in soup.findAll('div',
                                    attrs={'class': re.compile(r'(fmwz_ml|zzlm_nr)2?$')}):
            section_title = self.tag_to_string(section.find('div',
                                                            attrs={'class': re.compile(r'(lmnav_bt|zzlm_bt)1?$')}))
            self.log('Found section:', section_title)
            articles = []
            for post in section.findAll('div',
                                        attrs={'class': re.compile(r'(fmwz_bt|zzlm_nr_bt)')}):
                title = self.tag_to_string(post)
                url = post.find('a')['href']
                articles.append({'title': title, 'url': url, 'date': None})
            if articles:
                feeds.append((section_title, articles))
        return feeds
    def get_cover_url(self):
        return self.current_issue_cover
--- a/recipes/caros_amigos.recipe
+++ b/recipes/caros_amigos.recipe
@ -1,16 +0,0 @@
 __copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311839910(BasicNewsRecipe):
    title = u'Caros Amigos'
    oldest_article = 20
    max_articles_per_feed = 100
    language = 'pt_BR'
    __author__ = 'Pablo Aldama'
    feeds = [(u'Caros Amigos',
              u'http://carosamigos.terra.com.br/index2/index.php?format=feed&type=rss')]
    keep_only_tags = [dict(name='div', attrs={'class': ['blog']}), dict(name='div', attrs={'class': ['blogcontent']})
                      ]
    remove_tags = [dict(name='div', attrs={'class': 'addtoany'})]
--- a/recipes/carta_capital.recipe
+++ b/recipes/carta_capital.recipe
@ -1,29 +0,0 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1380852962(BasicNewsRecipe):
    title = u'Carta Capital'
    __author__ = 'Erico Lisboa'
    language = 'pt_BR'
    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = True
    use_embedded_content = False
    feeds = [(u'Pol\xedtica',
              u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia',
                                                                         u'http://www.cartacapital.com.br/economia/economia/atom.xml'),
             (u'Sociedade',
              u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'),
             (u'Internacional',
              u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'),
             (u'Tecnologia',
              u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'),
             (u'Cultura',
              u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'),
             (u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'),
             (u'Educa\xe7\xe3o',
              u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')]
--- a/recipes/china_press.recipe
+++ b/recipes/china_press.recipe
@ -1,69 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277228948(BasicNewsRecipe):
    title = u'China Press USA'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__ = 'rty'
    __version__ = '1.0'
    language = 'zh'
    pubisher = 'www.chinapressusa.com'
    description = 'Overseas Chinese Network Newspaper in the USA'
    category = 'News in Chinese, USA'
    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    encoding = 'UTF-8'
    conversion_options = {'linearize_tables': True}
    masthead_url = 'http://www.chinapressusa.com/common/images/logo.gif'
    extra_css = '''
             @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
             body {
                  margin-right: 8pt;
                  font-family: 'DroidFont', serif;}
              h1  {font-family: 'DroidFont', serif, sans-serif}
            .show {font-family: 'DroidFont', serif, sans-serif}
            '''
    feeds = [
        (u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
        (u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
        (u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
    ]
    keep_only_tags = [
        dict(name='div', attrs={'class': 'show'}),
    ]
    remove_tags = [
        #               dict(name='table', attrs={'class':'xle'}),
        dict(name='div', attrs={'class': 'time'}),
    ]
    remove_tags_after = [
        dict(name='div', attrs={'class': 'bank17'}),
        #         dict(name='a', attrs={'class':'ab12'}),
    ]
    def append_page(self, soup, appendtag, position):
        pager = soup.find('div', attrs={'id': 'displaypagenum'})
        if pager:
            nexturl = self.INDEX + pager.a['href']
            soup2 = self.index_to_soup(nexturl)
            texttag = soup2.find('div', attrs={'class': 'show'})
            for it in texttag.findAll(style=True):
                del it['style']
            newpos = len(texttag.contents)
            self.append_page(soup2, texttag, newpos)
            texttag.extract()
            appendtag.insert(position, texttag)
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        soup.head.insert(0, mtag)
        for item in soup.findAll(style=True):
            del item['style']
        self.append_page(soup, soup.body, 3)
        pager = soup.find('div', attrs={'id': 'displaypagenum'})
        if pager:
            pager.extract()
        return soup
--- a/recipes/cnd.recipe
+++ b/recipes/cnd.recipe
@ -1,73 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 __copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
 '''
 cnd.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheCND(BasicNewsRecipe):
    title = 'CND'
    __author__ = 'Derek Liang'
    description = ''
    INDEX = 'http://cnd.org'
    language = 'zh'
    conversion_options = {'linearize_tables': True}
    remove_tags_before = dict(name='div', id='articleHead')
    remove_tags_after = dict(id='copyright')
    remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
        'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
    no_stylesheets = True
    preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
                          (re.compile('<table width.*?</table>',
                                      re.DOTALL), lambda m: ''),
                          ]
    def print_version(self, url):
        if url.find('news/article.php') >= 0:
            return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
        else:
            return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        articles = {}
        for a in soup.findAll('a', attrs={'target': '_cnd'}):
            url = a['href']
            if url.find('article.php') < 0:
                continue
            if url.startswith('/'):
                url = 'http://cnd.org' + url
            title = self.tag_to_string(a)
            self.log('\tFound article: ', title, 'at', url)
            date = a.nextSibling
            if re.search('cm', date):
                continue
            if (date is not None) and len(date) > 2:
                if date not in articles:
                    articles[date] = []
                articles[date].append(
                    {'title': title, 'url': url, 'description': '', 'date': ''})
                self.log('\t\tAppend to : ', date)
        mostCurrent = sorted(articles).pop()
        self.title = 'CND ' + mostCurrent
        feeds.append((self.title, articles[mostCurrent]))
        return feeds
    def populate_article_metadata(self, article, soup, first):
        header = soup.find('h3')
        self.log('header: ' + self.tag_to_string(header))
        pass
--- a/recipes/cnd_weekly.recipe
+++ b/recipes/cnd_weekly.recipe
@ -1,74 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 __copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
 '''
 cnd.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheCND(BasicNewsRecipe):
    title = 'CND Weekly'
    __author__ = 'Derek Liang'
    description = ''
    INDEX = 'http://cnd.org'
    language = 'zh'
    conversion_options = {'linearize_tables': True}
    remove_tags_before = dict(name='div', id='articleHead')
    remove_tags_after = dict(id='copyright')
    remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
        'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
    no_stylesheets = True
    preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
                          (re.compile('<table width.*?</table>',
                                      re.DOTALL), lambda m: ''),
                          ]
    def print_version(self, url):
        if url.find('news/article.php') >= 0:
            return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
        else:
            return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        articles = {}
        for a in soup.findAll('a', attrs={'target': '_cnd'}):
            url = a['href']
            if url.find('article.php') < 0:
                continue
            if url.startswith('/'):
                url = 'http://cnd.org' + url
            title = self.tag_to_string(a)
            date = a.nextSibling
            if not re.search('cm', date):
                continue
            self.log('\tFound article: ', title, 'at', url, '@', date)
            if (date is not None) and len(date) > 2:
                if date not in articles:
                    articles[date] = []
                articles[date].append(
                    {'title': title, 'url': url, 'description': '', 'date': ''})
                self.log('\t\tAppend to : ', date)
        sorted_articles = sorted(articles)
        while sorted_articles:
            mostCurrent = sorted_articles.pop()
            self.title = 'CND ' + mostCurrent
            feeds.append((self.title, articles[mostCurrent]))
        return feeds
    def populate_article_metadata(self, article, soup, first):
        header = soup.find('h3')
        self.log('header: ' + self.tag_to_string(header))
        pass
--- a/recipes/dnevni_avaz.recipe
+++ b/recipes/dnevni_avaz.recipe
@ -1,76 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dnevniavaz.ba
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 def new_tag(soup, name, attrs=()):
    impl = getattr(soup, 'new_tag', None)
    if impl is not None:
        return impl(name, attrs=dict(attrs))
    return Tag(soup, name, attrs=attrs or None)
 class DnevniAvaz(BasicNewsRecipe):
    title = 'Dnevni Avaz'
    __author__ = 'Darko Miletic'
    description = 'Latest news from Bosnia'
    publisher = 'Dnevni Avaz'
    category = 'news, politics, Bosnia and Herzegovina'
    oldest_article = 2
    max_articles_per_feed = 100
    no_stylesheets = True
    encoding = 'utf-8'
    use_embedded_content = False
    remove_javascript = True
    cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
    lang = 'bs-BA'
    language = 'bs'
    direction = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'  # noqa
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
    }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags = [dict(name='div', attrs={'id': [
                           'fullarticle-title', 'fullarticle-leading', 'fullarticle-date', 'fullarticle-text', 'articleauthor']})]
    remove_tags = [dict(name=['object', 'link', 'base'])]
    feeds = [
        (u'Najnovije', u'http://www.dnevniavaz.ba/rss/novo'), (u'Najpopularnije',
                                                               u'http://www.dnevniavaz.ba/rss/popularno')
    ]
    def replace_tagname(self, soup, tagname, tagid, newtagname):
        headtag = soup.find(tagname, attrs={'id': tagid})
        if headtag:
            headtag.name = newtagname
        return
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang'] = self.lang
        mlang = new_tag(soup, 'meta', [
                    ("http-equiv", "Content-Language"), ("content", self.lang)])
        mcharset = new_tag(soup, 'meta', [
                       ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
        soup.head.insert(0, mlang)
        soup.head.insert(1, mcharset)
        self.replace_tagname(soup, 'div', 'fullarticle-title', 'h1')
        self.replace_tagname(soup, 'div', 'fullarticle-leading', 'h3')
        self.replace_tagname(soup, 'div', 'fullarticle-date', 'h5')
        return self.adeify_images(soup)
--- a/recipes/elperiodico_catalan.recipe
+++ b/recipes/elperiodico_catalan.recipe
@ -1,73 +0,0 @@
 #!/usr/bin/env  python2
 # -*- coding: utf-8 -*-
 __license__ = 'GPL v3'
 __copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
 '''
 elperiodico.cat
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 def new_tag(soup, name, attrs=()):
    impl = getattr(soup, 'new_tag', None)
    if impl is not None:
        return impl(name, attrs=dict(attrs))
    return Tag(soup, name, attrs=attrs or None)
 class ElPeriodico_cat(BasicNewsRecipe):
    title = 'El Periodico de Catalunya'
    __author__ = 'Jordi Balcells/Darko Miletic'
    description = 'Noticies des de Catalunya'
    publisher = 'elperiodico.cat'
    category = 'news, politics, Spain, Catalunya'
    oldest_article = 2
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    delay = 1
    encoding = 'cp1252'
    language = 'ca'
    html2lrf_options = [
        '--comment', description, '--category', category, '--publisher', publisher
    ]
    html2epub_options = 'publisher="' + publisher + \
        '"\ncomments="' + description + '"\ntags="' + category + '"'
    feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'),
             (u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'),
             (u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'),
             (u'Ci\xe8ncia i tecnologia',
              u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'),
             (u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'),
             (u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'),
             (u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'),
             (u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'),
             (u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'),
             (u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'),
             (u'Cultura i espectacles',
              u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'),
             (u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')]
    keep_only_tags = [dict(name='div', attrs={'class': 'titularnoticia'}),
                      dict(name='div', attrs={'class': 'noticia_completa'})]
    remove_tags = [dict(name='div', attrs={'class': ['opcionb', 'opcionb last', 'columna_noticia']}),
                   dict(name='span', attrs={'class': 'opcionesnoticia'})
                   ]
    def print_version(self, url):
        return url.replace('/default.asp?', '/print.asp?')
    def preprocess_html(self, soup):
        mcharset = new_tag(soup, 'meta', [
                       ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
        soup.head.insert(0, mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/escrevinhador.recipe
+++ b/recipes/escrevinhador.recipe
@ -1,28 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Escrevinhador(BasicNewsRecipe):
    title = 'Blog Escrevinhador'
    __author__ = 'Diniz Bortolotto'
    description = 'Posts do Blog Escrevinhador'
    publisher = 'Rodrigo Viana'
    oldest_article = 5
    max_articles_per_feed = 20
    category = 'news, politics, Brazil'
    language = 'pt_BR'
    publication_type = 'news and politics portal'
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    feeds = [(u'Blog Escrevinhador', u'http://www.rodrigovianna.com.br/feed')]
    reverse_article_order = True
    remove_tags_after = [dict(name='div', attrs={'class': 'text'})]
    remove_tags = [
        dict(id='header'),
        dict(name='p', attrs={'class': 'tags'}),
        dict(name='div', attrs={'class': 'sociable'})
    ]
--- a/recipes/idg_now.recipe
+++ b/recipes/idg_now.recipe
@ -1,49 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class IDGNow(BasicNewsRecipe):
    title = 'IDG Now!'
    __author__ = 'Diniz Bortolotto'
    description = 'Posts do IDG Now!'
    oldest_article = 7
    max_articles_per_feed = 20
    encoding = 'utf8'
    publisher = 'Now!Digital Business Ltda.'
    category = 'technology, telecom, IT, Brazil'
    language = 'pt_BR'
    publication_type = 'technology portal'
    use_embedded_content = False
    extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
    def get_article_url(self, article):
        link = article.get('link', None)
        if link is None:
            return article
        if link.split('/')[-1] == "story01.htm":
            link = link.split('/')[-2]
            a = ['0B', '0C', '0D', '0E', '0F', '0G',
                 '0I', '0N', '0L0S', '0A', '0J3A']
            b = ['.', '/', '?', '-', '=', '&', '_', '.com', 'www.', '0', ':']
            for i in range(0, len(a)):
                link = link.replace(a[i], b[i])
            link = link.split('&')[-3]
            link = link.split('=')[1]
            link = link + "/IDGNoticiaPrint_view"
        return link
    feeds = [
        (u'Ultimas noticias', u'http://rss.idgnow.com.br/c/32184/f/499640/index.rss'),
        (u'Computa\xe7\xe3o Corporativa',
         u'http://rss.idgnow.com.br/c/32184/f/499643/index.rss'),
        (u'Carreira', u'http://rss.idgnow.com.br/c/32184/f/499644/index.rss'),
        (u'Computa\xe7\xe3o Pessoal',
         u'http://rss.idgnow.com.br/c/32184/f/499645/index.rss'),
        (u'Internet', u'http://rss.idgnow.com.br/c/32184/f/499646/index.rss'),
        (u'Mercado', u'http://rss.idgnow.com.br/c/32184/f/419982/index.rss'),
        (u'Seguran\xe7a',
         u'http://rss.idgnow.com.br/c/32184/f/499647/index.rss'),
        (u'Telecom e Redes',
         u'http://rss.idgnow.com.br/c/32184/f/499648/index.rss')
    ]
    reverse_article_order = True
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
--- a/recipes/ming_pao_toronto.recipe
+++ b/recipes/ming_pao_toronto.recipe
--- a/recipes/ming_pao_vancouver.recipe
+++ b/recipes/ming_pao_vancouver.recipe
--- a/recipes/nanfengchuang.recipe
+++ b/recipes/nanfengchuang.recipe
@ -1,106 +0,0 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import unicode_literals
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from lxml import html
 __license__ = 'GPL v3'
 class Nfcmag(BasicNewsRecipe):
    __author__ = '2014, Chen Wei <weichen302@gmx.com>'
    title = 'Nan Feng Chuang / South Reviews Magazine'
    description = '''
 South Reviews Magazine, established in 1985, is a Guangzhou-based political and
 economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
 narration, insightful expression among its readers, mostly government
 officials, economic leaders and intellectuals. It has been praised as “the No.1
 Political& Economical Magazine in China”.
 The US magazine Time described South Reviews as "a highbrow news magazine".
 Other international media organizations such as BBC and NHK have conducted
 tracking shots of South Reviews journalists, to record their unique value
 special position in China’s media industry. Harvard-Yenching Library, Stanford
 University's East Asia Library and UC Berkeley Library have collections of the
 magazine since its first issue, taking them as an important source to
 understand China's economic and social reform.
 Since 2008, South Reviews has been committed to transforming into a
 research-based media organization. Most of its editors, reporters and
 contributors have remarkably strong academic backgrounds, coming from Peking
 University, Tsinghua University, London School of Economics and Political
 Science, the Chinese University of Hong Kong, Renmin University of China, and
 other well-known institutions. The magazine has established research divisions,
 including the State Policy Research Center and the Brand Promotion Research
 Center, working in cooperation with well-known academic institutions and
 providing valuable research reports for governments and companies.
 '''
    language = 'zh'
    encoding = 'UTF-8'
    publisher = 'South Reviews Magazine'
    publication_type = 'magazine'
    category = 'news, Business, China'
    timefmt = ' [%a, %d %b, %Y]'
    needs_subscription = False
    remove_tags = [dict(attrs={'class': ['side-left', 'side-right',
                                         'breadcrumbs', 'score', 'weboNav']}),
                   dict(attrs={'id': ['header', 'footer']}),
                   dict(name=['script', 'noscript', 'style'])]
    no_stylesheets = True
    remove_javascript = True
    current_issue_url = ""
    current_issue_cover = ""
    def parse_index(self):
        baseurl = 'http://www.nfcmag.com/'
        raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
        soup_start = html.fromstring(raw)
        els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
                         and contains(@class, 'comBox')]
                         //a[@href and not(@id) and not(child::img)]
                         """)
        for x in els:
            issueurl = x.get('href')
            if not issueurl.lower().startswith('http://'):
                issueurl = baseurl + issueurl
            break
        raw = self.index_to_soup(issueurl, raw=True)
        soup_issue = html.fromstring(raw)
        coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
                         and contains(@class, 'comBox')]
                         //img[@*] """)
        imgurl = coverimg[0].get('src')
        if not imgurl.lower().startswith('http://'):
            imgurl = baseurl + imgurl
        self.current_issue_cover = imgurl
        feeds = []
        sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
                         and contains(@class, 'comBox')] """)
        for sec in sections:
            pages = sec.xpath('.//h5')
            sec_title = sec.xpath('.//h4')[0].text_content()
            self.log('Found section:', sec_title)
            articles = []
            for x in pages:
                url = x.xpath('.//a')[0].get('href')
                if not url.lower().startswith('http://'):
                    url = baseurl + url
                url = url[:-5] + '-s.html'  # to print view
                title = x.text_content()
                articles.append({'title': title, 'url': url, 'date': None})
            if articles:
                feeds.append((sec_title, articles))
        return feeds
    def get_cover_url(self):
        return self.current_issue_cover
--- a/recipes/noticias_r7.recipe
+++ b/recipes/noticias_r7.recipe
@ -1,43 +0,0 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class PortalR7(BasicNewsRecipe):
    title = 'Noticias R7'
    __author__ = 'Diniz Bortolotto'
    description = 'Noticias Portal R7'
    oldest_article = 2
    max_articles_per_feed = 20
    encoding = 'utf8'
    publisher = 'Rede Record'
    category = 'news, Brazil'
    language = 'pt_BR'
    publication_type = 'newsportal'
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    remove_attributes = ['style']
    feeds = [
        (u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
        (u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
        (u'Internacional',
         u'http://www.r7.com/data/rss/internacional.xml'),
        (u'Tecnologia e Ci\xeancia',
         u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
    ]
    reverse_article_order = True
    keep_only_tags = [dict(name='div', attrs={'class': 'materia'})]
    remove_tags = [
        dict(id=['espalhe', 'report-erro']),
        dict(name='ul', attrs={'class': 'controles'}),
        dict(name='ul', attrs={'class': 'relacionados'}),
        dict(name='div', attrs={'class': 'materia_banner'}),
        dict(name='div', attrs={'class': 'materia_controles'})
    ]
    preprocess_regexps = [
        (re.compile(r'<div class="materia">.*<div class="materia_cabecalho">', re.DOTALL | re.IGNORECASE),
         lambda match: '<div class="materia"><div class="materia_cabecalho">')
    ]
--- a/recipes/noticias_unb.recipe
+++ b/recipes/noticias_unb.recipe
@ -1,24 +0,0 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class NoticiasUnB(BasicNewsRecipe):
    title = 'Noticias UnB'
    __author__ = 'Diniz Bortolotto'
    description = 'Noticias da UnB'
    oldest_article = 5
    max_articles_per_feed = 20
    category = 'news, educational, Brazil'
    language = 'pt_BR'
    publication_type = 'newsportal'
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    feeds = [(u'UnB Agência', u'http://www.unb.br/noticias/rss/noticias.rss')]
    reverse_article_order = True
    def print_version(self, url):
        return url.replace('http://', 'http://www.unb.br/noticias/print_email/imprimir.php?u=http://')