From a862073a0092775de284b2de5572f72b0a2bb0d6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 08:36:30 -0600 Subject: [PATCH 1/9] Fix #862381 (SMH Recipe downloads extraneous "Video feedback" form (fix attached)) --- recipes/smh.recipe | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/recipes/smh.recipe b/recipes/smh.recipe index 220cd7faf3..e59e9f21d4 100644 --- a/recipes/smh.recipe +++ b/recipes/smh.recipe @@ -22,10 +22,10 @@ class Smh_au(BasicNewsRecipe): remove_empty_feeds = True masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg' publication_type = 'newspaper' - extra_css = """ - h1{font-family: Georgia,"Times New Roman",Times,serif } - body{font-family: Arial,Helvetica,sans-serif} - .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} + extra_css = """ + h1{font-family: Georgia,"Times New Roman",Times,serif } + body{font-family: Arial,Helvetica,sans-serif} + .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} """ conversion_options = { @@ -35,16 +35,16 @@ class Smh_au(BasicNewsRecipe): , 'language' : language } - remove_tags = [ - dict(name='div', attrs={'id':['googleAds','moreGoogleAds','comments']}) - ,dict(name='div', attrs={'class':'cT-imageMultimedia'}) - ,dict(name=['object','embed','iframe']) - ] remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})] keep_only_tags = [dict(name='div',attrs={'id':'content'})] - remove_tags = [ - dict(attrs={'class':'hidden'}), - dict(name=['link','meta','base','embed','object','iframe']) + remove_tags = [ + dict(name='div', + attrs={'id':['googleAds','moreGoogleAds','comments', + 'video-player-content']}), + dict(name='div', attrs={'class':'cT-imageMultimedia'}), + dict(name=['object','embed','iframe']), + dict(attrs={'class':'hidden'}), + dict(name=['link','meta','base','embed','object','iframe']) ] remove_attributes = ['width','height','lang'] @@ -84,4 +84,4 @@ class Smh_au(BasicNewsRecipe): if not item.has_key('alt'): item['alt'] = 'image' return soup - \ No newline at end of file + From 81228a0e6ba0369f9889270c4cbfb76c90a8056e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 08:39:32 -0600 Subject: [PATCH 2/9] Fix #862175 (Need support for iRobot 7" A9 android tablet please) --- src/calibre/devices/android/driver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index e62af0c5a0..583d4a786b 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -134,7 +134,7 @@ class ANDROID(USBMS): VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', - 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT'] + 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -144,11 +144,12 @@ class ANDROID(USBMS): '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', - 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', 'ALPANDIGITAL'] + 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', + 'ALPANDIGITAL', 'ANDROID_MID'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', - '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL'] + '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL', 'ANDROID_MID'] OSX_MAIN_MEM = 'Android Device Main Memory' From e07dffe09e9411e577578fec735d40b4147d80d7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 09:45:52 -0600 Subject: [PATCH 3/9] Fix Title Sort field not being displayed in Book details panel --- src/calibre/gui2/book_details.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index a070b24986..e8968b7cf5 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -85,6 +85,8 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): for field, display in get_field_list(fm): metadata = fm.get(field, None) + if field == 'sort': + field = 'title_sort' if all_fields: display = True if (not display or not metadata or mi.is_null(field) or From 1ff5c05aaad4c1ca1a8526267b1b5e5bc9451b87 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 09:54:35 -0600 Subject: [PATCH 4/9] Gosc Niedzielny by Piotr Kontek --- recipes/gosc_niedzielny.recipe | 112 +++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 recipes/gosc_niedzielny.recipe diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe new file mode 100644 index 0000000000..12942f0f8a --- /dev/null +++ b/recipes/gosc_niedzielny.recipe @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import PersistentTemporaryFile +import re + +class GN(BasicNewsRecipe): + EDITION = 0 + + __author__ = 'Piotr Kontek' + title = u'Gość niedzielny' + description = 'Weekly magazine' + encoding = 'utf-8' + no_stylesheets = True + language = 'pl' + remove_javascript = True + temp_files = [] + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + br.open(url) + source = br.response().read() + page = self.index_to_soup(source) + + main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'}) + + title = main_section.find('h2') + info = main_section.find('div', attrs={'class' : 'cf doc_info'}) + authors = info.find(attrs={'class':'l'}) + article = str(main_section.find('p', attrs={'class' : 'doc_lead'})) + first = True + for p in main_section.findAll('p', attrs={'class':None}, recursive=False): + if first and p.find('img') != None: + article = article + '

' + article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') + article = article + '' + for s in p.findAll('span'): + article = article + self.tag_to_string(s) + article = article + '

' + else: + article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/') + first = False + + html = unicode(title) + unicode(authors) + unicode(article) + + self.temp_files.append(PersistentTemporaryFile('_temparse.html')) + self.temp_files[-1].write(html) + self.temp_files[-1].close() + return self.temp_files[-1].name + + def find_last_issue(self): + soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny') + #szukam zdjęcia i linka do porzedniego pełnego numeru + first = True + for d in soup.findAll('div', attrs={'class':'l release_preview_l'}): + img = d.find('img') + if img != None: + a = img.parent + self.EDITION = a['href'] + self.title = img['alt'] + self.cover_url = 'http://www.gosc.pl' + img['src'] + if not first: + break + first = False + + def parse_index(self): + self.find_last_issue() + soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION) + feeds = [] + #wstepniak + a = soup.find('div',attrs={'class':'release-wp-b'}).find('a') + articles = [ + {'title' : self.tag_to_string(a), + 'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'), + 'date' : '', + 'description' : ''} + ] + feeds.append((u'Wstępniak',articles)) + #kategorie + for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}): + if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb': + main_block = self.index_to_soup('http://www.gosc.pl' + addr['href']) + articles = list(self.find_articles(main_block)) + if len(articles) > 0: + section = addr.string + feeds.append((section, articles)) + return feeds + + def find_articles(self, main_block): + for a in main_block.findAll('div', attrs={'class':'prev_doc2'}): + art = a.find('a') + yield { + 'title' : self.tag_to_string(art), + 'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'), + 'date' : '', + 'description' : '' + } + for a in main_block.findAll('div', attrs={'class':'sr-document'}): + art = a.find('a') + yield { + 'title' : self.tag_to_string(art), + 'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'), + 'date' : '', + 'description' : '' + } + From d27fc5a06e94df4736b0cfed24e7c3d8541656c6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 09:58:39 -0600 Subject: [PATCH 5/9] Fix American Spectator --- recipes/amspec.recipe | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/recipes/amspec.recipe b/recipes/amspec.recipe index e5a76a4f86..684b28cf5c 100644 --- a/recipes/amspec.recipe +++ b/recipes/amspec.recipe @@ -18,25 +18,16 @@ class TheAmericanSpectator(BasicNewsRecipe): use_embedded_content = False language = 'en' INDEX = 'http://spectator.org' - - conversion_options = { + auto_cleanup = True + encoding = 'utf-8' + + conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } - keep_only_tags = [ - dict(name='div', attrs={'class':'post inner'}) - ,dict(name='div', attrs={'class':'author-bio'}) - ] - - remove_tags = [ - dict(name='object') - ,dict(name='div', attrs={'class':['col3','post-options','social']}) - ,dict(name='p' , attrs={'class':['letter-editor','meta']}) - ] - feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')] def get_cover_url(self): @@ -48,10 +39,10 @@ class TheAmericanSpectator(BasicNewsRecipe): link_item2 = soup2.find('div',attrs={'class':'post inner issues'}) cover_url = self.INDEX + link_item2.img['src'] return cover_url - + def print_version(self, url): return url + '/print' - + def get_article_url(self, article): return article.get('guid', None) - + From 59750cbd8f444b7c2338f022175aba794783958d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 11:21:30 -0600 Subject: [PATCH 6/9] Fix #854408 (Need support for Vizio Android Tablet (VTAB1008)) --- src/calibre/devices/android/driver.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 583d4a786b..eb867b80d3 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -77,8 +77,12 @@ class ANDROID(USBMS): 0xdeed : [0x0222], }, - # Viewsonic - 0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], }, + # Viewsonic/Vizio + 0x0489 : { + 0xc001 : [0x0226], + 0xc004 : [0x0226], + 0x8801 : [0x0226, 0x0227], + }, # Acer 0x502 : { 0x3203 : [0x0100, 0x224]}, @@ -134,7 +138,7 @@ class ANDROID(USBMS): VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', - 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON'] + 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -145,7 +149,7 @@ class ANDROID(USBMS): 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', - 'ALPANDIGITAL', 'ANDROID_MID'] + 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', From 18860f49708018b9ca1be91bb850a599d9fffa10 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 15:11:02 -0600 Subject: [PATCH 7/9] Fix #862741 (Updated recipe for readitlater) --- recipes/readitlater.recipe | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 4bd8fc2bd6..ea9c92868b 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,5 +1,8 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = ''' +2010, Darko Miletic +2011, Przemyslaw Kryger +''' ''' readitlaterlist.com ''' @@ -9,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Readitlater(BasicNewsRecipe): title = 'Read It Later' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic, Przemyslaw Kryger' description = '''Personalized news feeds. Go to readitlaterlist.com to setup up your news. Fill in your account username, and optionally you can add password.''' @@ -23,9 +26,6 @@ class Readitlater(BasicNewsRecipe): INDEX = u'http://readitlaterlist.com' LOGIN = INDEX + u'/l' - - feeds = [(u'Unread articles' , INDEX + u'/unread')] - def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None: @@ -37,12 +37,31 @@ class Readitlater(BasicNewsRecipe): br.submit() return br + def get_feeds(self): + self.report_progress(0, ('Fetching list of feeds...')) + lfeeds = [] + i = 1 + feedurl = self.INDEX + u'/unread/1' + while True: + title = u'Unread articles, page ' + str(i) + lfeeds.append((title, feedurl)) + self.report_progress(0, ('Got ') + str(i) + (' feeds')) + i += 1 + soup = self.index_to_soup(feedurl) + ritem = soup.find('a',attrs={'id':'next', 'class':'active'}) + if ritem is None: + break + feedurl = self.INDEX + ritem['href'] + if self.test: + return lfeeds[:2] + return lfeeds + def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) ritem = soup.find('ul',attrs={'id':'list'}) From 0772f7d62bfeb30ce42268cc6c416bd69ebb433e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 21:19:10 -0600 Subject: [PATCH 8/9] Various Colombian news sources by BIGO-CAVA --- recipes/diario_la_republica.recipe | 11 ++++++ recipes/el_colombiano.recipe | 4 +-- recipes/el_espectador.recipe | 54 ++++++++++++++++++++++++++++++ recipes/el_mundo_co.recipe | 50 +++++++++++++++++++++++++++ recipes/el_tiempo.recipe | 9 +++-- recipes/portafolio.recipe | 4 +-- recipes/revista_semana.recipe | 11 ++++++ 7 files changed, 133 insertions(+), 10 deletions(-) create mode 100644 recipes/diario_la_republica.recipe create mode 100644 recipes/el_espectador.recipe create mode 100644 recipes/el_mundo_co.recipe create mode 100644 recipes/revista_semana.recipe diff --git a/recipes/diario_la_republica.recipe b/recipes/diario_la_republica.recipe new file mode 100644 index 0000000000..1f0e0d565b --- /dev/null +++ b/recipes/diario_la_republica.recipe @@ -0,0 +1,11 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1317341449(BasicNewsRecipe): + title = u'Diario La Republica' + __author__ = 'CAVALENCIA' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + language = 'es_CO' + + feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')] diff --git a/recipes/el_colombiano.recipe b/recipes/el_colombiano.recipe index 492229dc4f..7a413529bb 100644 --- a/recipes/el_colombiano.recipe +++ b/recipes/el_colombiano.recipe @@ -2,12 +2,10 @@ from calibre.web.feeds.news import BasicNewsRecipe - - class AdvancedUserRecipe1311790237(BasicNewsRecipe): title = u'Periódico El Colombiano' - language = 'es_CO' __author__ = 'BIGO-CAVA' + language = 'es_CO' cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' remove_tags_before = dict(id='contenidoArt') remove_tags_after = dict(id='enviaTips') diff --git a/recipes/el_espectador.recipe b/recipes/el_espectador.recipe new file mode 100644 index 0000000000..b7187cbdf3 --- /dev/null +++ b/recipes/el_espectador.recipe @@ -0,0 +1,54 @@ +# coding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class ColombiaElEspectador(BasicNewsRecipe): + title = u'Periódico el Espectador' + __author__ = 'BIGO-CAVA' + cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif' + #remove_tags_before = dict(id='fb-root') + remove_tags_before = dict(id='content') + remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})] + language = 'es_CO' + #keep_only_tags = [dict(name='div', id='content')] + remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}), + dict(name='div', attrs={'class':'relpauta'}), + dict(name='div', attrs={'class':'recursosrelacionados'}), + dict(name='div', attrs={'class':'nav_negocios'})] + # dict(name='div', attrs={'class':'tags_playerrecurso'}), + # dict(name='div', attrs={'class':'ico-mail2'}), + # dict(name='div', attrs={'id':'caja-instapaper'}), + # dict(name='div', attrs={'class':'modulo herramientas'})] + oldest_article = 2 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif' + publication_type = 'newspaper' + + extra_css = """ + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } + h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } + """ + + + feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'), + (u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'), + (u'Paz', u'http://www.elespectador.com/noticias/paz/feed'), + (u'Economía', u'http://www.elespectador.com/economia/feed'), + (u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'), + (u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'), + (u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'), + (u'Salud', u'http://www.elespectador.com/noticias/salud/feed'), + (u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'), + (u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'), + (u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'), + (u'Deportes', u'http://www.elespectador.com/deportes/feed'), + (u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'), + (u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'), + (u'Opinión', u'http://www.elespectador.com/opinion/feed'), + (u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')] diff --git a/recipes/el_mundo_co.recipe b/recipes/el_mundo_co.recipe new file mode 100644 index 0000000000..a8186673d5 --- /dev/null +++ b/recipes/el_mundo_co.recipe @@ -0,0 +1,50 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class ColombiaElMundo02(BasicNewsRecipe): + title = u'Periódico El Mundo' + __author__ = 'BIGO-CAVA' + language = 'es_CO' + cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png' + remove_tags_before = dict(id='miga_pan') + #remove_tags_before = [dict(name='div', attrs={'class':'contenido'})] + remove_tags_after = [dict(name='div', attrs={'class':'cuadro_opciones_new1'})] + #keep_only_tags = [dict(name='div', id='miga_pan')] + remove_tags = [dict(name='div', attrs={'class':'ruta'}), + dict(name='div', attrs={'class':'buscador'}), + dict(name='div', attrs={'class':'iconos'}), + dict(name='div', attrs={'class':'otros_iconos'}), + dict(name='div', attrs={'class':'cuadro_opciones_new1'}), + dict(name='div', attrs={'class':'otras_noticias'}), + dict(name='div', attrs={'class':'notas_relacionadas'}), + dict(name='div', attrs={'id':'lateral_2'})] + oldest_article = 2 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png' + publication_type = 'newspaper' + + extra_css = """ + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } + h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } + """ + + + feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'), + (u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'), + (u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'), + (u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'), + (u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'), + (u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'), + (u'Internacional', u'http://www.elmundo.com/images/rss/noticias_internacional.xml'), + (u'Servicios Públicos', u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'), + (u'Infraestructura', u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'), + (u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'), + (u'Derechos Humanos', u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'), + (u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'), + (u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')] diff --git a/recipes/el_tiempo.recipe b/recipes/el_tiempo.recipe index 3a10cca767..794e75df7d 100644 --- a/recipes/el_tiempo.recipe +++ b/recipes/el_tiempo.recipe @@ -2,18 +2,17 @@ from calibre.web.feeds.news import BasicNewsRecipe - - - class ColombiaElTiempo02(BasicNewsRecipe): title = u'Periódico el Tiempo' - language = 'es_CO' __author__ = 'BIGO-CAVA' + language = 'es_CO' cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png' - remove_tags_before = dict(id='fb-root') + #remove_tags_before = dict(id='fb-root') + remove_tags_before = dict(id='contenidoArt') remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})] keep_only_tags = [dict(name='div', id='contenidoArt')] remove_tags = [dict(name='div', attrs={'class':'social-media'}), + dict(name='div', attrs={'class':'recomend-art'}), dict(name='div', attrs={'class':'caja-facebook'}), dict(name='div', attrs={'class':'caja-twitter'}), dict(name='div', attrs={'class':'caja-buzz'}), diff --git a/recipes/portafolio.recipe b/recipes/portafolio.recipe index 1b442f68f7..4ba0d16c46 100644 --- a/recipes/portafolio.recipe +++ b/recipes/portafolio.recipe @@ -4,13 +4,13 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1311799898(BasicNewsRecipe): title = u'Periódico Portafolio Colombia' - language = 'es_CO' __author__ = 'BIGO-CAVA' + language = 'es_CO' cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' remove_tags_before = dict(id='contenidoArt') remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})] keep_only_tags = [dict(name='div', id='contenidoArt')] - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 100 remove_javascript = True no_stylesheets = True diff --git a/recipes/revista_semana.recipe b/recipes/revista_semana.recipe new file mode 100644 index 0000000000..1137764932 --- /dev/null +++ b/recipes/revista_semana.recipe @@ -0,0 +1,11 @@ +from calibre.web.feeds.news import BasicNewsRecipe + + +class AdvancedUserRecipe1317341570(BasicNewsRecipe): + title = u'Revista Semana' + __author__ = 'BIGO-CAVA' + language = 'es_CO' + oldest_article = 7 + max_articles_per_feed = 100 + + feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')] From d44734d9fe5c7140b5e6ee614575d6782e96f48a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Sep 2011 22:10:40 -0600 Subject: [PATCH 9/9] ... --- src/calibre/ebooks/lrf/objects.py | 36 +++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py index a83794b7b5..012871d0e7 100644 --- a/src/calibre/ebooks/lrf/objects.py +++ b/src/calibre/ebooks/lrf/objects.py @@ -452,24 +452,26 @@ class BlockAttr(StyleObject, LRFObject): @classmethod def to_css(cls, obj, inline=False): ans = '' + def item(line): - ans += '' if inline else '\t' + ans = '' if inline else '\t' ans += line ans += ' ' if inline else '\n' + return ans if hasattr(obj, 'sidemargin'): margin = str(obj.sidemargin) + 'px' - item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin)) + ans += item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin)) if hasattr(obj, 'topskip'): - item('margin-top: %dpx;'%obj.topskip) + ans += item('margin-top: %dpx;'%obj.topskip) if hasattr(obj, 'footskip'): - item('margin-bottom: %dpx;'%obj.footskip) + ans += item('margin-bottom: %dpx;'%obj.footskip) if hasattr(obj, 'framewidth'): - item('border: solid %dpx'%obj.framewidth) + ans += item('border: solid %dpx'%obj.framewidth) if hasattr(obj, 'framecolor') and obj.framecolor.a < 255: - item('border-color: %s;'%obj.framecolor.to_html()) + ans += item('border-color: %s;'%obj.framecolor.to_html()) if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255: - item('background-color: %s;'%obj.bgcolor.to_html()) + ans += item('background-color: %s;'%obj.bgcolor.to_html()) return ans @@ -480,39 +482,41 @@ class TextCSS(object): @classmethod def to_css(cls, obj, inline=False): ans = '' + def item(line): - ans += '' if inline else '\t' + ans = '' if inline else '\t' ans += line ans += ' ' if inline else '\n' + return ans fs = getattr(obj, 'fontsize', None) if fs is not None: - item('font-size: %fpt;'%(int(fs)/10.)) + ans += item('font-size: %fpt;'%(int(fs)/10.)) fw = getattr(obj, 'fontweight', None) if fw is not None: - item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal')) + ans += item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal')) fn = getattr(obj, 'fontfacename', None) if fn is not None: fn = cls.FONT_MAP[fn] - item('font-family: %s;'%fn) + ans += item('font-family: %s;'%fn) fg = getattr(obj, 'textcolor', None) if fg is not None: fg = fg.to_html() - item('color: %s;'%fg) + ans += item('color: %s;'%fg) bg = getattr(obj, 'textbgcolor', None) if bg is not None: bg = bg.to_html() - item('background-color: %s;'%bg) + ans += item('background-color: %s;'%bg) al = getattr(obj, 'align', None) if al is not None: al = dict(head='left', center='center', foot='right') - item('text-align: %s;'%al) + ans += item('text-align: %s;'%al) lh = getattr(obj, 'linespace', None) if lh is not None: - item('text-align: %fpt;'%(int(lh)/10.)) + ans += item('text-align: %fpt;'%(int(lh)/10.)) pi = getattr(obj, 'parindent', None) if pi is not None: - item('text-indent: %fpt;'%(int(pi)/10.)) + ans += item('text-indent: %fpt;'%(int(pi)/10.)) return ans