diff --git a/recipes/eluniversalimpresa.recipe b/recipes/eluniversalimpresa.recipe deleted file mode 100644 index 331da13a86..0000000000 --- a/recipes/eluniversalimpresa.recipe +++ /dev/null @@ -1,86 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ElUniversalImpresaRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'es_MX' - version = 1 - - title = u'El Universal (Edici\u00F3n Impresa)' - publisher = u'El Universal' - category = u'News, Mexico' - description = u'News from Mexico' - - remove_empty_feeds = True - remove_javascript = True - - INDEX = 'http://www.eluniversal.com.mx' - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif;} - ''' - - conversion_options = {'comments': description, 'tags': category, 'language': 'en', - 'publisher': publisher, 'linearize_tables': True} - - def parse_index(self): - soup = self.index_to_soup( - 'http://www.eluniversal.com.mx/edicion_impresa.html') - index = [] - - table = soup.find('table', attrs={'width': '500'}) - articles = [] - for td in table.findAll('td', attrs={'class': 'arnegro12'}): - a = td.a - a.extract() - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - description = self.tag_to_string(td) - articles.append({'title': title, 'date': None, - 'url': url, 'description': description}) - - index.append(('Primera Plana', articles)) - - for td in table.findAll(lambda tag: tag.name == 'td' and len(tag.attrs) == 0): - articles = [] - feedTitle = None - for a in td.findAll('a'): - if not feedTitle: - feedTitle = self.tag_to_string(a) - continue - - title = self.tag_to_string(a) - - url = self.INDEX + a['href'] - articles.append({'title': title, 'date': None, - 'url': url, 'description': ''}) - - index.append((feedTitle, articles)) - - return index - - def print_version(self, url): - if url.find('wcarton') >= 0: - return None - - main, sep, id = url.rpartition('/') - - return main + '/vi_' + id - - def preprocess_html(self, soup): - table = soup.find('table') - table.extract() - - for p in soup.findAll('p'): - if self.tag_to_string(p).strip() == '': - p.extract() - - tag = soup.find('font', attrs={'color': '#0F046A'}) - if tag: - for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: - tag[attr] = '' - del tag[attr] - tag.name = 'h1' - - return soup diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe index 99147871ea..12bb968064 100644 --- a/recipes/la_jornada.recipe +++ b/recipes/la_jornada.recipe @@ -13,7 +13,6 @@ try: from urllib.parse import urlparse, urlunparse, parse_qs except ImportError: from urlparse import urlparse, urlunparse, parse_qs -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -30,7 +29,6 @@ class LaJornada_mx(BasicNewsRecipe): use_embedded_content = False language = 'es_MX' remove_empty_feeds = True - cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf") masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png' publication_type = 'newspaper' extra_css = """ diff --git a/recipes/milenio.recipe b/recipes/milenio.recipe deleted file mode 100644 index aabc707434..0000000000 --- a/recipes/milenio.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python2 -__license__ = 'GPL v3' -__copyright__ = '2010, Brendan Sleight ' -''' -impreso.milenio.com -''' -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - -import datetime - - -class Milenio(BasicNewsRecipe): - title = u'Milenio-diario' - __author__ = 'Bmsleight' - language = 'es_MX' - description = 'Milenio-diario' - oldest_article = 10 - max_articles_per_feed = 100 - no_stylesheets = False - index = 'http://impreso.milenio.com' - - keep_only_tags = [ - dict(name='div', attrs={'class': 'content'}) - ] - - def parse_index(self): - # "%m/%d/%Y" - # http://impreso.milenio.com/Nacional/2010/09/01/ - totalfeeds = [] - soup = self.index_to_soup( - self.index + "/Nacional/" + datetime.date.today().strftime("%Y/%m/%d")) - maincontent = soup.find('div', attrs={'class': 'content'}) - mfeed = [] - if maincontent: - for itt in maincontent.findAll('a', href=True): - if "/node/" in str(itt['href']): - url = self.index + itt['href'] - title = self.tag_to_string(itt) - description = '' - date = strftime(self.timefmt) - mfeed.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - totalfeeds.append(('Articles', mfeed)) - return totalfeeds