diff --git a/Changelog.yaml b/Changelog.yaml
index c5eadc5e65..a25b52bfbf 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,6 +19,117 @@
# new recipes:
# - title:
+- version: 0.7.49
+ date: 2011-03-11
+
+ new features:
+ - title: "News download: More flexible news downlaod scheduling. You can now schedule by days of the week, days of the month and an interval, which can be as small as an hour for news sources that change rapidly"
+
+ - title: "Improved support for dragging and dropping cover images directly from web browsers into calibre."
+ description: >
+ "You can drop the images onto the cover in calibre and it will be replaced. Tested on a number of OS/browser combinations, but I am sure there a still a few for which it wont work."
+
+ - title: "Add shortcuts of Alt+Left and Alt+Right for the next and previous buttons in the edit metadata dialog."
+ tickets: [9360]
+
+ - title: "When adding a GUI plugin, prompt the user for where the plugin should be displayed"
+
+ - title: "Conversion: When using the Level x Table of Contents options, support the case when the level 1,2,3 items are spread over multiple HTML files."
+
+ - title: "Support for the Optimus V"
+
+ - title: "FB2 Input: Support for tables"
+ tickets: [9302]
+
+ - title: "Display a checkmark/cross next to 'true' and 'false' items in custom columns. Controlled via Preferences->Add a custom column"
+
+ - title: "Catalog generation: Reuse cover from existing catalog, allows the use of a custom cover for catalogs"
+
+ - title: "When setting covers in calibre, resize to fit within a maximum size of (1200, 1600), to prevent slowdowns due to extra large covers. This size can be controlled via Preferences->Tweaks."
+ tickets: [9277]
+
+ bug fixes:
+ - title: "Fix long standing bug that caused errors when saving books to disk if the book metadata has certain chinese/russian characters on windows. The fix required some changes to how unicode paths are handled in calibre, so it might have broken something else. If so, please open a ticket."
+ tickets: [7250]
+
+ - title: "Custom recipes: Store custom recipes in the calibre config directory instead of the library database. This allows scheduling of custom recipes to work with multiple libraries. Note that you may have to re-schedule any existing custom recipes."
+
+ - title: "Restore the ability to do search and replace on ISBN. Use the 'identifiers' field with type isbn to do this"
+
+ - title: "Fix amazon metadata download plugin not working with ISBN-13 and social metadata not downloading if the supplied ISBN 10 is not for an edition available on Amazon"
+
+ - title: "Workaround for openlibrary blocking the user agent used by calibre, preventing cover downloads from that site"
+
+ - title: "FB2 Output: Add sequence to metadata. Fix bugs with author names. Fix bug where elements were put inside
tags."
+
+ - title: "Conversion pipeline: If the input HTML document uses uppercase tag and attribute names, convert them to lowercase"
+
+ - title: "RTF Input: Fix space after unicode quote character being incorrectly removed"
+ tickets: [9343]
+
+ - title: "Fix regression that broke the ebook-device command line program in the previous release"
+
+ - title: "Fix custom columns with numbers not allowing entry of positive numbers of 64-bit machines"
+ tickets: [9283]
+
+ - title: "Fix regression that caused focus to be lost when editing metadata in the device view"
+ tickets: [9323]
+
+ - title: "CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file."
+ tickets: [9173]
+
+ - title: "Fix regression that caused the viewer to forget its window size and other attributes when launched from within calibre, after calibre is restarted."
+ tickets: [9326]
+
+ - title: "News download: Fix regression that caused the delay parameter in recipes to not actually delay downloads."
+ tickets: [9332]
+
+ - title: "Conversion pipeline: When converting the :first-letter pseudo CSS selector to a follow W3C rules for handling leading punctuation characters."
+ tickets: [9319]
+
+ - title: "Fix regression that caused clicking saved searches in the Tag Browser to not work"
+
+ - title: "Comic Input: Fix conversion failing when output profile is set to Tablet Output"
+
+ - title: "Replace leading periods in all path components generated by calibre with underscores"
+
+ - title: "Search and replace preferences: Prevent very long strings from causing the wizard button to get pushed off the screen"
+
+ - title: "Content server: Fix regression that caused various metadata to be missing in the book details view."
+ ticckets: [8929]
+
+ - title: "Apple driver: Ignore invalid EPUBs when sending to iTunes"
+
+ improved recipes:
+ - golem.de
+ - gulli.de
+ - La Nacion
+ - Ming Pao
+ - evz.ro
+ - Kompiuterra
+ - NRC Handelsblad (EPUB)
+ - The Leduc - Wetaskiwin Pipestone Flyer
+
+ new recipes:
+ - title: "Various Romanian news sources"
+ author: Silviu Cotoara
+
+ - title: "Salt Lake City Tribune"
+ author: Charles Holbert
+
+ - title: "Bay Citizen and Oakland North"
+ author: noah
+
+ - title: "Nikkei Business and JB Press"
+ author: Ado Nishimura
+
+ - title: "El Pais Babelia"
+ author: oneillpt
+
+ - title: "Komchadluek"
+ author: ballsai
+
+
- version: 0.7.48
date: 2011-03-04
diff --git a/resources/images/news/avantaje.png b/resources/images/news/avantaje.png
new file mode 100644
index 0000000000..79f83c2665
Binary files /dev/null and b/resources/images/news/avantaje.png differ
diff --git a/resources/images/news/cotidianul.png b/resources/images/news/cotidianul.png
new file mode 100644
index 0000000000..2e57dbde54
Binary files /dev/null and b/resources/images/news/cotidianul.png differ
diff --git a/resources/images/news/ele.png b/resources/images/news/ele.png
new file mode 100644
index 0000000000..82f66b5caa
Binary files /dev/null and b/resources/images/news/ele.png differ
diff --git a/resources/images/news/felicia.png b/resources/images/news/felicia.png
new file mode 100644
index 0000000000..4bc1fd35d8
Binary files /dev/null and b/resources/images/news/felicia.png differ
diff --git a/resources/images/news/financiarul.png b/resources/images/news/financiarul.png
new file mode 100644
index 0000000000..1d91a72a34
Binary files /dev/null and b/resources/images/news/financiarul.png differ
diff --git a/resources/images/news/hitro.png b/resources/images/news/hitro.png
new file mode 100644
index 0000000000..75c08a1c25
Binary files /dev/null and b/resources/images/news/hitro.png differ
diff --git a/resources/images/news/imperatortravel.png b/resources/images/news/imperatortravel.png
new file mode 100644
index 0000000000..c459759ed0
Binary files /dev/null and b/resources/images/news/imperatortravel.png differ
diff --git a/resources/images/news/kamikaze.png b/resources/images/news/kamikaze.png
new file mode 100644
index 0000000000..49ef2f50a1
Binary files /dev/null and b/resources/images/news/kamikaze.png differ
diff --git a/resources/images/news/kompiutierra.png b/resources/images/news/kompiutierra.png
new file mode 100644
index 0000000000..272e3d905f
Binary files /dev/null and b/resources/images/news/kompiutierra.png differ
diff --git a/resources/images/news/monden.png b/resources/images/news/monden.png
new file mode 100644
index 0000000000..fcf8ad42ae
Binary files /dev/null and b/resources/images/news/monden.png differ
diff --git a/resources/images/news/onemagazine.png b/resources/images/news/onemagazine.png
new file mode 100644
index 0000000000..61ce048c30
Binary files /dev/null and b/resources/images/news/onemagazine.png differ
diff --git a/resources/images/news/pcworldro.png b/resources/images/news/pcworldro.png
new file mode 100644
index 0000000000..532548302d
Binary files /dev/null and b/resources/images/news/pcworldro.png differ
diff --git a/resources/images/news/promotor.png b/resources/images/news/promotor.png
new file mode 100644
index 0000000000..a479cf135b
Binary files /dev/null and b/resources/images/news/promotor.png differ
diff --git a/resources/images/news/protvmagazin.png b/resources/images/news/protvmagazin.png
new file mode 100644
index 0000000000..8824760edb
Binary files /dev/null and b/resources/images/news/protvmagazin.png differ
diff --git a/resources/images/news/psychologies.png b/resources/images/news/psychologies.png
new file mode 100644
index 0000000000..027296dde2
Binary files /dev/null and b/resources/images/news/psychologies.png differ
diff --git a/resources/images/news/publika.png b/resources/images/news/publika.png
new file mode 100644
index 0000000000..696a9b42a0
Binary files /dev/null and b/resources/images/news/publika.png differ
diff --git a/resources/images/news/rbc_ru.png b/resources/images/news/rbc_ru.png
new file mode 100644
index 0000000000..46c5d3fdce
Binary files /dev/null and b/resources/images/news/rbc_ru.png differ
diff --git a/resources/images/news/timesnewroman.png b/resources/images/news/timesnewroman.png
new file mode 100644
index 0000000000..6ba02939b4
Binary files /dev/null and b/resources/images/news/timesnewroman.png differ
diff --git a/resources/images/news/trombon.png b/resources/images/news/trombon.png
new file mode 100644
index 0000000000..641b04f1b7
Binary files /dev/null and b/resources/images/news/trombon.png differ
diff --git a/resources/images/news/tvmania.png b/resources/images/news/tvmania.png
new file mode 100644
index 0000000000..022267f885
Binary files /dev/null and b/resources/images/news/tvmania.png differ
diff --git a/resources/images/news/viva.png b/resources/images/news/viva.png
new file mode 100644
index 0000000000..e01fc6125e
Binary files /dev/null and b/resources/images/news/viva.png differ
diff --git a/resources/images/news/wallstreetro.png b/resources/images/news/wallstreetro.png
new file mode 100644
index 0000000000..d72bc70ca0
Binary files /dev/null and b/resources/images/news/wallstreetro.png differ
diff --git a/resources/recipes/avantaje.recipe b/resources/recipes/avantaje.recipe
new file mode 100644
index 0000000000..9a48616fa1
--- /dev/null
+++ b/resources/recipes/avantaje.recipe
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+avantaje.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Avantaje(BasicNewsRecipe):
+ title = u'Avantaje'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u''
+ publisher = u'Avantaje'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Stiri'
+ encoding = 'utf-8'
+ cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'articol'})
+ , dict(name='div', attrs={'class':'gallery clearfix'})
+ , dict(name='div', attrs={'align':'justify'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'id':['color_sanatate_box']})
+ , dict(name='div', attrs={'class':['nav']})
+ , dict(name='div', attrs={'class':['voteaza_art']})
+ , dict(name='div', attrs={'class':['bookmark']})
+ , dict(name='div', attrs={'class':['links clearfix']})
+ , dict(name='div', attrs={'class':['title']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['title']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://feeds.feedburner.com/Avantaje')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/bay_citizen.recipe b/resources/recipes/bay_citizen.recipe
new file mode 100644
index 0000000000..e6a6c2b63d
--- /dev/null
+++ b/resources/recipes/bay_citizen.recipe
@@ -0,0 +1,46 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheBayCitizen(BasicNewsRecipe):
+ title = 'The Bay Citizen'
+ language = 'en'
+ __author__ = 'noah'
+ description = 'The Bay Citizen'
+ publisher = 'The Bay Citizen'
+ INDEX = u'http://www.baycitizen.org'
+ category = 'news'
+ oldest_article = 2
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ masthead_url = 'http://media.baycitizen.org/images/layout/logo1.png'
+ feeds = [('Main Feed', 'http://www.baycitizen.org/feeds/stories/')]
+ keep_only_tags = [dict(name='div', attrs={'class':'story'})]
+ remove_tags = [
+ dict(name='div', attrs={'class':'socialBar'}),
+ dict(name='div', attrs={'id':'text-resize'}),
+ dict(name='div', attrs={'class':'story relatedContent'}),
+ dict(name='div', attrs={'id':'comment_status_loading'}),
+ ]
+
+ def append_page(self, soup, appendtag, position):
+ pager = soup.find('a',attrs={'class':'stry-next'})
+ if pager:
+ nexturl = self.INDEX + pager['href']
+ soup2 = self.index_to_soup(nexturl)
+ texttag = soup2.find('div', attrs={'class':'body'})
+ for it in texttag.findAll(style=True):
+ del it['style']
+ newpos = len(texttag.contents)
+ self.append_page(soup2,texttag,newpos)
+ texttag.extract()
+ appendtag.insert(position,texttag)
+
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ self.append_page(soup, soup.body, 3)
+ garbage = soup.findAll(id='story-pagination')
+ [trash.extract() for trash in garbage]
+ garbage = soup.findAll('em', 'cont-from-prev')
+ [trash.extract() for trash in garbage]
+ return soup
diff --git a/resources/recipes/cotidianul.recipe b/resources/recipes/cotidianul.recipe
new file mode 100644
index 0000000000..f00196532c
--- /dev/null
+++ b/resources/recipes/cotidianul.recipe
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+cotidianul.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Cotidianul(BasicNewsRecipe):
+ title = u'Cotidianul'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u''
+ publisher = u'Cotidianul'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri'
+ encoding = 'utf-8'
+ cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'titlu'})
+ , dict(name='div', attrs={'class':'gallery clearfix'})
+ , dict(name='div', attrs={'align':'justify'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['space']})
+ , dict(name='div', attrs={'id':['title_desc']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['space']})
+ , dict(name='span', attrs={'class':['date']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/economist.recipe b/resources/recipes/economist.recipe
index 17bf4c8c20..9447fe2193 100644
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@@ -24,7 +24,7 @@ class Economist(BasicNewsRecipe):
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
- dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
+ dict(attrs={'class':['dblClkTrk', 'ec-article-info', 'share_inline_header']}),
{'class': lambda x: x and 'share-links-header' in x},
]
keep_only_tags = [dict(id='ec-article-body')]
diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe
index f4a4efd932..d1766211d7 100644
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@@ -18,7 +18,8 @@ class Economist(BasicNewsRecipe):
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
- dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
+ dict(attrs={'class':['dblClkTrk', 'ec-article-info',
+ 'share_inline_header']}),
{'class': lambda x: x and 'share-links-header' in x},
]
keep_only_tags = [dict(id='ec-article-body')]
diff --git a/resources/recipes/el_pais_babelia.recipe b/resources/recipes/el_pais_babelia.recipe
new file mode 100644
index 0000000000..31b983ec0b
--- /dev/null
+++ b/resources/recipes/el_pais_babelia.recipe
@@ -0,0 +1,49 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElPaisBabelia(BasicNewsRecipe):
+
+ title = 'El Pais Babelia'
+ __author__ = 'oneillpt'
+ description = 'El Pais Babelia'
+ INDEX = 'http://www.elpais.com/suple/babelia/'
+ language = 'es'
+
+ remove_tags_before = dict(name='div', attrs={'class':'estructura_2col'})
+ keep_tags = [dict(name='div', attrs={'class':'estructura_2col'})]
+ remove_tags = [dict(name='div', attrs={'class':'votos estirar'}),
+ dict(name='div', attrs={'id':'utilidades'}),
+ dict(name='div', attrs={'class':'info_relacionada'}),
+ dict(name='div', attrs={'class':'mod_apoyo'}),
+ dict(name='div', attrs={'class':'contorno_f'}),
+ dict(name='div', attrs={'class':'pestanias'}),
+ dict(name='div', attrs={'class':'otros_webs'}),
+ dict(name='div', attrs={'id':'pie'})
+ ]
+ #no_stylesheets = True
+ remove_javascript = True
+
+ def parse_index(self):
+ articles = []
+ soup = self.index_to_soup(self.INDEX)
+ feeds = []
+ for section in soup.findAll('div', attrs={'class':'contenedor_nuevo'}):
+ section_title = self.tag_to_string(section.find('h1'))
+ articles = []
+ for post in section.findAll('a', href=True):
+ url = post['href']
+ if url.startswith('/'):
+ url = 'http://www.elpais.es'+url
+ title = self.tag_to_string(post)
+ if str(post).find('class=') > 0:
+ klass = post['class']
+ if klass != "":
+ self.log()
+ self.log('--> post: ', post)
+ self.log('--> url: ', url)
+ self.log('--> title: ', title)
+ self.log('--> class: ', klass)
+ articles.append({'title':title, 'url':url})
+ if articles:
+ feeds.append((section_title, articles))
+ return feeds
+
diff --git a/resources/recipes/ele.recipe b/resources/recipes/ele.recipe
new file mode 100644
index 0000000000..ea8954366b
--- /dev/null
+++ b/resources/recipes/ele.recipe
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+ele.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ele(BasicNewsRecipe):
+ title = u'Ele'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Dezv\u0103luie ceea ce e\u015fti'
+ publisher = u'Ele'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Femei'
+ encoding = 'utf-8'
+ cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='h1', attrs={'class':'article_title'})
+ , dict(name='div', attrs={'class':'article_text'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.ele.ro/rss_must_read')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/evz.ro.recipe b/resources/recipes/evz.ro.recipe
index bce151d1fc..841dc80429 100644
--- a/resources/recipes/evz.ro.recipe
+++ b/resources/recipes/evz.ro.recipe
@@ -1,52 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
+__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
evz.ro
'''
-import re
from calibre.web.feeds.news import BasicNewsRecipe
-class EVZ_Ro(BasicNewsRecipe):
- title = 'evz.ro'
- __author__ = 'Darko Miletic'
- description = 'News from Romania'
- publisher = 'evz.ro'
- category = 'news, politics, Romania'
- oldest_article = 2
- max_articles_per_feed = 200
- no_stylesheets = True
- encoding = 'utf8'
- use_embedded_content = False
+class EvenimentulZilei(BasicNewsRecipe):
+ title = u'Evenimentul Zilei'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = ''
+ publisher = u'Evenimentul Zilei'
+ oldest_article = 5
language = 'ro'
- masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
- extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri'
+ encoding = 'utf-8'
+ cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
conversion_options = {
- 'comment' : description
- , 'tags' : category
- , 'publisher' : publisher
- , 'language' : language
- }
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
- preprocess_regexps = [
- (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '')
- ,(re.compile(r' .*?', re.DOTALL|re.IGNORECASE),lambda match: ' ')
- ]
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'single'})
+ , dict(name='img', attrs={'id':'placeholder'})
+ , dict(name='a', attrs={'id':'holderlink'})
+ ]
- remove_tags = [
- dict(name=['form','embed','iframe','object','base','link','script','noscript'])
- ,dict(attrs={'class':['section','statsInfo','email il']})
- ,dict(attrs={'id' :'gallery'})
- ]
+ remove_tags = [
+ dict(name='p', attrs={'class':['articleInfo']})
+ , dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
+ , dict(name='div', attrs={'id':['bannerAddoceansArticle']})
+ ]
- remove_tags_after = dict(attrs={'class':'section'})
- keep_only_tags = [dict(attrs={'class':'single'})]
- remove_attributes = ['height','width']
+ remove_tags_after = [
+ dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
+ ]
- feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
+ feeds = [
+ (u'Feeds', u'http://www.evz.ro/rss.xml')
+ ]
def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- return soup
+ return self.adeify_images(soup)
diff --git a/resources/recipes/felicia.recipe b/resources/recipes/felicia.recipe
new file mode 100644
index 0000000000..0772e38494
--- /dev/null
+++ b/resources/recipes/felicia.recipe
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+revistafelicia.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Felicia(BasicNewsRecipe):
+ title = u'Revista Felicia'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'O revist\u0103 pentru sufletul t\u0103u'
+ publisher = u'Revista Felicia'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste'
+ encoding = 'utf-8'
+ cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'header'})
+ , dict(name='div', attrs={'id':'contentArticol'})
+ ]
+
+ remove_tags = [
+ dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
+ , dict(name='div',attrs={'class':['content']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.revistafelicia.ro/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/financiarul.recipe b/resources/recipes/financiarul.recipe
new file mode 100644
index 0000000000..807f771408
--- /dev/null
+++ b/resources/recipes/financiarul.recipe
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+financiarul.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Financiarul(BasicNewsRecipe):
+ title = u'Financiarul'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'FIN.ro'
+ publisher = u'Financiarul'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri'
+ encoding = 'utf-8'
+ cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'col2ContentLeftL'})
+ ]
+
+ remove_tags = [
+ dict(name='div',attrs={'class':['infoArticol']})
+ , dict(name='ul', attrs={'class':'navSectiuni'})
+ , dict(name='div', attrs={'class':'separator separatorTop'})
+ , dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
+ , dict(name='ul', attrs={'class':['related']})
+ , dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
+ ]
+
+ remove_tags_after = [
+ dict(name='ul', attrs={'class':['related']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.financiarul.com/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/golem_de.recipe b/resources/recipes/golem_de.recipe
index d0280a9617..77374cb162 100644
--- a/resources/recipes/golem_de.recipe
+++ b/resources/recipes/golem_de.recipe
@@ -1,17 +1,83 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+#!/usr/bin/env python
-class AdvancedUserRecipe1257093338(BasicNewsRecipe):
+from calibre.web.feeds.news import BasicNewsRecipe
+class golem_ger(BasicNewsRecipe):
title = u'Golem.de'
language = 'de'
__author__ = 'Kovid Goyal'
oldest_article = 7
max_articles_per_feed = 100
+ language = 'de'
+ lang = 'de-DE'
+ no_stylesheets = True
+ encoding = 'iso-8859-1'
+ recursions = 1
+ match_regexps = [r'http://www.golem.de/.*.html']
- feeds = [(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0')]
+ keep_only_tags = [
+ dict(name='h1', attrs={'class':'artikelhead'}),
+ dict(name='p', attrs={'class':'teaser'}),
+ dict(name='div', attrs={'class':'artikeltext'}),
+ dict(name='h2', attrs={'id':'artikelhead'}),
+ ]
- def print_version(self, url):
- murxb = url.rfind('/') + 1
- murxc = url[murxb :-5]
- murxa = 'http://www.golem.de/' + 'print.php?a=' + murxc
- return murxa
+
+ remove_tags = [
+ dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
+ dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
+ dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
+ dict(name='td', attrs={'class':['xsmall']}),
+ ]
+
+
+ # remove_tags_after = [
+ # dict(name='div', attrs={'id':['contentad2']})
+ # ]
+
+
+ feeds = [
+ (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
+ (u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
+ (u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
+ (u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
+ (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+ (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
+ (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+ (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
+ (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
+ (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
+ (u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
+ (u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
+ (u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
+ (u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
+ (u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
+ (u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
+ (u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
+ (u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
+ ]
+
+
+
+
+ feeds = [
+ (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
+ (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
+ (u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
+ (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
+ (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
+ (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
+ ]
+
+
+ extra_css = '''
+ h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
+ h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+ h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
+ h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+ h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+ .teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
+ .xsmall{font-style:italic;font-size:x-small;}
+ .td{font-style:italic;font-size:x-small;}
+ img {align:left;}
+ '''
diff --git a/resources/recipes/gulli.recipe b/resources/recipes/gulli.recipe
index e695aa02ef..8a861f527f 100644
--- a/resources/recipes/gulli.recipe
+++ b/resources/recipes/gulli.recipe
@@ -11,6 +11,26 @@ class AdvancedUserRecipe1259599587(BasicNewsRecipe):
feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]
- remove_tags = [{'class' : ['addthis_button', 'BreadCrumb']}, {'id' : ['plista0']}]
+ remove_tags = [dict(name='div', attrs={'class':['FloatL','_forumBox']})]
- keep_only_tags = [dict(name='div', attrs={'class':'inside'})]
+ keep_only_tags = [dict(name='div', attrs={'id':['_contentLeft']})]
+
+ remove_tags_after = [dict(name='div', attrs={'class':['_bookmark']})]
+
+
+
+
+
+ extra_css = '''
+ h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:25px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:22px; }
+ h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+ h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+ h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+ h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+ .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+ .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+ .byline {color:#666;margin-bottom:0;font-size:12px}
+ .blockquote {color:#030303;font-style:italic;padding-left:15px;}
+ img {align:center;}
+ .li {list-style-type: none}
+ '''
diff --git a/resources/recipes/hitro.recipe b/resources/recipes/hitro.recipe
new file mode 100644
index 0000000000..3a85847c81
--- /dev/null
+++ b/resources/recipes/hitro.recipe
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+hit.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Hit(BasicNewsRecipe):
+ title = u'HIT'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = 'IT'
+ publisher = 'HIT'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,IT'
+ encoding = 'utf-8'
+ cover_url = 'http://www.hit.ro/lib/images/frontend/hit_logo.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='h1', attrs={'class':'art_titl'})
+ , dict(name='div', attrs={'id':'continut_articol'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.hit.ro/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/imperatortravel.recipe b/resources/recipes/imperatortravel.recipe
new file mode 100644
index 0000000000..2b6d323bf5
--- /dev/null
+++ b/resources/recipes/imperatortravel.recipe
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+imperatortravel.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Imperatortravel(BasicNewsRecipe):
+ title = u'Imperator Travel'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'C\u0103l\u0103torii'
+ publisher = u'Imperator Travel'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri,Turism,Calatorii'
+ encoding = 'utf-8'
+ cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'article first_main_article'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['meta']})
+ , dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
+ , dict(name='div', attrs={'class':['connect_widget']})
+ , dict(name='ul', attrs={'class':['similar-posts']})
+ ]
+
+ remove_tags_after = [
+ dict(name='ul', attrs={'class':['similar-posts']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/jbpress.recipe b/resources/recipes/jbpress.recipe
new file mode 100644
index 0000000000..acfb1c78d6
--- /dev/null
+++ b/resources/recipes/jbpress.recipe
@@ -0,0 +1,42 @@
+import urllib2
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JBPress(BasicNewsRecipe):
+ title = u'JBPress'
+ language = 'ja'
+ description = u'Japan Business Press New articles (using small print version)'
+ __author__ = 'Ado Nishimura'
+ needs_subscription = True
+ oldest_article = 7
+ max_articles_per_feed = 100
+ remove_tags_before = dict(id='wrapper')
+ no_stylesheets = True
+
+ feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
+
+
+ def get_cover_url(self):
+ return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
+
+ def get_browser(self):
+ html = '''
+'''
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('http://jbpress.ismedia.jp/articles/print/5549')
+ response = br.response()
+ response.set_data(html)
+ br.set_response(response)
+ br.select_form(nr=0)
+ br["login"] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
+ def print_version(self, url):
+ url = urllib2.urlopen(url).geturl() # resolve redirect.
+ return url.replace('/-/', '/print/')
diff --git a/resources/recipes/kamikaze.recipe b/resources/recipes/kamikaze.recipe
new file mode 100644
index 0000000000..1369cb6f85
--- /dev/null
+++ b/resources/recipes/kamikaze.recipe
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+kamikazeonline.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kamikaze(BasicNewsRecipe):
+ title = u'Kamikaze'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'S\u0103pt\u0103m\u00e2nal sc\u0103pat de sub control'
+ publisher = 'Kamikaze'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste'
+ encoding = 'utf-8'
+ cover_url = 'http://www.kamikazeonline.ro/wp-content/themes/kamikaze/images/kamikazeonline_header.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'content'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['connect_confirmation_cell connect_confirmation_cell_no_like']})
+ , dict(name='h3', attrs={'id':['comments']})
+ , dict(name='ul', attrs={'class':['addtoany_list']})
+ , dict(name='p', attrs={'class':['postmetadata']})
+ ]
+
+ remove_tags_after = [
+ dict(name='p', attrs={'class':['postmetadata']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.kamikazeonline.ro/feed/')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/komchadluek.recipe b/resources/recipes/komchadluek.recipe
new file mode 100644
index 0000000000..5f0d2f58a2
--- /dev/null
+++ b/resources/recipes/komchadluek.recipe
@@ -0,0 +1,46 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class KomChadLuek(BasicNewsRecipe):
+
+ title= 'KomChadLuek'
+ description = 'Komchadluek News'
+ __author__ = 'ballsaii and Chotechai'
+ __license__ = 'GPL v3'
+ publisher= 'Nation Media Group'
+ category = 'news, Thai'
+ language = 'th'
+
+ oldest_article = 1
+ max_articles_per_feed = 100
+ no_stylesheets= True
+ remove_javascript=True
+
+ cover_url = 'http://www.komchadluek.net/images_layout2/komchadluek_headerlogo.png'
+
+ keep_only_tags = []
+ keep_only_tags.append(dict(name = 'h2'))
+ keep_only_tags.append(dict(name = 'div', attrs={'id':'news_detail_news'}))
+
+ remove_tags_after=[dict(name='hr')]
+
+ feeds =(
+(u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07','http://www.komchadluek.net/rss/politic.xml'),
+(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/sport.xml'),
+(u'\u0e40\u0e01\u0e29\u0e15\u0e23','http://www.komchadluek.net/rss/agriculture.xml'),
+(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/foreign.xml'),
+(u'\u0e1a\u0e31\u0e19\u0e40\u0e17\u0e34\u0e07','http://www.komchadluek.net/rss/entertainment.xml'),
+(u'\u0e1c\u0e39\u0e49\u0e2b\u0e0d\u0e34\u0e07-\u0e41\u0e1f\u0e0a\u0e31\u0e48\u0e19','http://www.komchadluek.net/rss/fashion.xml'),
+(u'\u0e1e\u0e23\u0e30\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07','http://www.komchadluek.net/rss/amulet.xml'),
+(u'\u0e20\u0e39\u0e21\u0e34\u0e20\u0e32\u0e04-\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e04\u0e21\u0e17\u0e49\u0e2d\u0e07\u0e16\u0e34\u0e48\u0e19','http://www.komchadluek.net/rss/local.xml'),
+(u'\u0e25\u0e38\u0e07\u0e41\u0e08\u0e48\u0e21','http://www.komchadluek.net/rss/unclecham.xml'),
+(u'\u0e44\u0e25\u0e1f\u0e4c\u0e2a\u0e44\u0e15\u0e25\u0e4c','http://www.komchadluek.net/rss/lifestyle.xml'),
+(u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08-\u0e01\u0e32\u0e23\u0e15\u0e25\u0e32\u0e14','http://www.komchadluek.net/rss/economic.xml'),
+(u'\u0e2d\u0e32\u0e2b\u0e32\u0e23','http://www.komchadluek.net/rss/food.xml'),
+(u'\u0e04\u0e19\u0e23\u0e31\u0e01\u0e1a\u0e49\u0e32\u0e19-\u0e22\u0e32\u0e19\u0e22\u0e19\u0e15\u0e4c','http://www.komchadluek.net/rss/homecar.xml'),
+(u'\u0e14\u0e39\u0e14\u0e27\u0e07-\u0e42\u0e2b\u0e23\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c','http://www.komchadluek.net/rss/horoscope.xml'),
+(u'\u0e27\u0e34\u0e17\u0e22\u0e4c\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c-\u0e44\u0e2d\u0e17\u0e35','http://www.komchadluek.net/rss/scienceit.xml'),
+(u'\u0e28\u0e32\u0e2a\u0e19\u0e32 \u0e28\u0e34\u0e25\u0e1b\u0e30-\u0e27\u0e31\u0e12\u0e19\u0e18\u0e23\u0e23\u0e21 \u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02','http://www.komchadluek.net/rss/artculture.xml'),
+(u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32', 'http://www.komchadluek.net/rss/education.xml'),
+(u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21','http://www.komchadluek.net/rss/article.xml'),
+(u'\u0e2d\u0e32\u0e0a\u0e0d\u0e32\u0e01\u0e23\u0e23\u0e21', 'http://www.komchadluek.net/rss/crime.xml')
+)
diff --git a/resources/recipes/kompiutierra.recipe b/resources/recipes/kompiutierra.recipe
index 0d30afa3a7..a82db9aced 100644
--- a/resources/recipes/kompiutierra.recipe
+++ b/resources/recipes/kompiutierra.recipe
@@ -1,36 +1,37 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL v3'
-__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
-__author__ = 'Vadim Dyadkin'
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class Computerra(BasicNewsRecipe):
- title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
- recursion = 50
- oldest_article = 100
- __author__ = 'Vadim Dyadkin'
- max_articles_per_feed = 100
- use_embedded_content = False
- simultaneous_downloads = 5
- language = 'ru'
- description = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u044b, \u043e\u043a\u043e\u043b\u043e\u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438 \u043e\u043a\u043e\u043b\u043e\u0444\u0438\u043b\u043e\u0441\u043e\u0444\u0441\u043a\u0438\u0435 \u0441\u0442\u0430\u0442\u044c\u0438, \u0433\u0430\u0434\u0436\u0435\u0442\u044b.'
-
- keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
-
-
- feeds = [(u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430', 'http://feeds.feedburner.com/ct_news/'),]
-
- remove_tags = [dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
- dict(name='ul', attrs={'class': "related_post"}),
- dict(name='p', attrs={'class': 'info'}),
- dict(name='a', attrs={'rel': 'tag', 'class': 'twitter-share-button', 'type': 'button_count'}),
- dict(name='h2', attrs={}),]
-
- extra_css = 'body { text-align: justify; }'
-
- def get_article_url(self, article):
- return article.get('feedburner:origLink', article.get('guid'))
-
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
+__author__ = 'Vadim Dyadkin'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Computerra(BasicNewsRecipe):
+ title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
+ oldest_article = 100
+ __author__ = 'Vadim Dyadkin (edited by A. Chewi)'
+ max_articles_per_feed = 50
+ use_embedded_content = False
+ remove_javascript = True
+ no_stylesheets = True
+ conversion_options = {'linearize_tables' : True}
+ simultaneous_downloads = 5
+ language = 'ru'
+ description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
+
+ keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
+
+ feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
+
+ remove_tags = [
+ dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
+ dict(name='ul', attrs={'class': "related_post"}),
+ dict(name='p', attrs={'class': 'info'}),
+ dict(name='a', attrs={'class': 'twitter-share-button'}),
+ dict(name='a', attrs={'type': 'button_count'}),
+ dict(name='h2', attrs={})
+ ]
+
+ def print_version(self, url):
+ return url + '?print=true'
diff --git a/resources/recipes/lanacion.recipe b/resources/recipes/lanacion.recipe
index 05e777ec67..cdee0e5e66 100644
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@@ -1,5 +1,5 @@
__license__ = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic '
+__copyright__ = '2008-2011, Darko Miletic '
'''
lanacion.com.ar
'''
@@ -17,14 +17,16 @@ class Lanacion(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
language = 'es_AR'
+ delay = 14
publication_type = 'newspaper'
remove_empty_feeds = True
- masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
- extra_css = """ h1{font-family: Georgia,serif}
- h2{color: #626262}
+ masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
+ extra_css = """
+ h1{font-family: Georgia,serif}
+ h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
- .notaFecha{color: #808080}
+ .notaFecha{color: #808080; font-size: small}
.notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
"""
@@ -37,47 +39,75 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language
}
- keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})]
+ keep_only_tags = [
+ dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
+ ,dict(name='div', attrs={'id':'content'})
+ ]
+
remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
- ,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] })
- ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
+ ,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
+ ,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
]
+
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [
- (u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
- ,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' )
- ,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' )
- ,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' )
- ,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' )
- ,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' )
- ,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' )
- ,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' )
- ,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' )
- ,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' )
- ,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
- ,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' )
- ,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' )
- ,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' )
- ,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' )
- ,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' )
- ,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' )
- ,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' )
- ,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' )
- ,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' )
- ,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' )
- ,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' )
- ,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' )
- ,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' )
- ,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' )
- ,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' )
+ (u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
+ ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
+ ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
+ ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
+ ,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
+ ,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
+ ,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
+ ,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
+ ,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
+ ,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
+ ,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
+ ,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
+ ,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
+ ,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
+ ,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
+ ,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
+ ,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
+ ,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
+ ,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
+ ,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
+ ,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
+ ,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
+ ,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
+ ,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
+ ,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
]
+
+ def get_article_url(self, article):
+ link = BasicNewsRecipe.get_article_url(self,article)
+ if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
+ return self.browser.open_novisit(link).geturl()
+ if link.rfind('galeria=') > 0:
+ return None
+ return link
+
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
- return self.adeify_images(soup)
+ for item in soup.findAll('a'):
+ limg = item.find('img')
+ if item.string is not None:
+ str = item.string
+ item.replaceWith(str)
+ else:
+ if limg:
+ item.name = 'div'
+ item.attrs = []
+ else:
+ str = self.tag_to_string(item)
+ item.replaceWith(str)
+ for item in soup.findAll('img'):
+ if not item.has_key('alt'):
+ item['alt'] = 'image'
+ return soup
diff --git a/resources/recipes/ming_pao.recipe b/resources/recipes/ming_pao.recipe
index bbdbbf7ace..4a405a59dd 100644
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@@ -1,7 +1,20 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
+
+# Users of Kindle 3 (with limited system-level CJK support)
+# please replace the following "True" with "False".
+__MakePeriodical__ = True
+# Turn it to True if your device supports display of CJK titles
+__UseChineseTitle__ = False
+
+
'''
Change Log:
+2011/03/06: add new articles for finance section, also a new section "Columns"
+2011/02/28: rearrange the sections
+ [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
+ View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
+ folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
@@ -19,55 +32,58 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
-
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe):
- IsCJKWellSupported = True # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
- title = 'Ming Pao - Hong Kong'
- oldest_article = 1
- max_articles_per_feed = 100
- __author__ = 'Eddie Lau'
- description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
- 'you are using a Kindle with firmware < 3.1, customize the'
- 'recipe')
- publisher = 'MingPao'
- category = 'Chinese, News, Hong Kong'
- remove_javascript = True
- use_embedded_content = False
- no_stylesheets = True
- language = 'zh'
- encoding = 'Big5-HKSCS'
- recursions = 0
- conversion_options = {'linearize_tables':True}
- timefmt = ''
- extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
- masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
- keep_only_tags = [dict(name='h1'),
+ title = 'Ming Pao - Hong Kong'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ __author__ = 'Eddie Lau'
+ description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+ publisher = 'MingPao'
+ category = 'Chinese, News, Hong Kong'
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'zh'
+ encoding = 'Big5-HKSCS'
+ recursions = 0
+ conversion_options = {'linearize_tables':True}
+ timefmt = ''
+ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+ masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
+ keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
- dict(attrs={'id':['newscontent']}), # entertainment page content
+ dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+ dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']})
]
- remove_tags = [dict(name='style'),
- dict(attrs={'id':['newscontent135']})] # for the finance page
- remove_attributes = ['width']
- preprocess_regexps = [
+ remove_tags = [dict(name='style'),
+ dict(attrs={'id':['newscontent135']}), # for the finance page
+ dict(name='table')] # for content fetched from life.mingpao.com
+ remove_attributes = ['width']
+ preprocess_regexps = [
(re.compile(r'', re.DOTALL|re.IGNORECASE),
lambda match: ''),
(re.compile(r' ', re.DOTALL|re.IGNORECASE),
lambda match: ''),
(re.compile(r'
', re.DOTALL|re.IGNORECASE), # for entertainment page
- lambda match: '')
+ lambda match: ''),
+ # skip after title in life.mingpao.com fetched article
+ (re.compile(r"", re.DOTALL|re.IGNORECASE),
+ lambda match: "
"),
+ (re.compile(r"
", re.DOTALL|re.IGNORECASE),
+ lambda match: "")
]
- def image_url_processor(cls, baseurl, url):
- # trick: break the url at the first occurance of digit, add an additional
- # '_' at the front
- # not working, may need to move this to preprocess_html() method
+ def image_url_processor(cls, baseurl, url):
+ # trick: break the url at the first occurance of digit, add an additional
+ # '_' at the front
+ # not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
@@ -99,253 +115,314 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
- return url
+ return url
- def get_dtlocal(self):
- dt_utc = datetime.datetime.utcnow()
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- dt_local = dt_utc - datetime.timedelta(-2.0/24)
- return dt_local
+ def get_dtlocal(self):
+ dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ dt_local = dt_utc - datetime.timedelta(-2.0/24)
+ return dt_local
- def get_fetchdate(self):
- return self.get_dtlocal().strftime("%Y%m%d")
+ def get_fetchdate(self):
+ return self.get_dtlocal().strftime("%Y%m%d")
- def get_fetchformatteddate(self):
- return self.get_dtlocal().strftime("%Y-%m-%d")
+ def get_fetchformatteddate(self):
+ return self.get_dtlocal().strftime("%Y-%m-%d")
- def get_fetchday(self):
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- return self.get_dtlocal().strftime("%d")
+ def get_fetchday(self):
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ return self.get_dtlocal().strftime("%d")
- def get_cover_url(self):
- cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
- br = BasicNewsRecipe.get_browser()
- try:
- br.open(cover)
- except:
- cover = None
- return cover
+ def get_cover_url(self):
+ cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+ br = BasicNewsRecipe.get_browser()
+ try:
+ br.open(cover)
+ except:
+ cover = None
+ return cover
- def parse_index(self):
- feeds = []
- dateStr = self.get_fetchdate()
- for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
- (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
- (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
- (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+ def parse_index(self):
+ feeds = []
+ dateStr = self.get_fetchdate()
+
+ for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+ (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+ (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special- editorial
+ ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+ if ed_articles:
+ feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+
+ for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
- (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
- ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
- (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
- (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
- (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+ (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special - finance
+ #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+ fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+ if fin_articles:
+ feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+
+ for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+ (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special - entertainment
+ ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+ if ent_articles:
+ feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+ for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
- # special - finance
- fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
- if fin_articles:
- feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
- # special - entertainment
- ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
- if ent_articles:
- feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
- return feeds
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- def parse_section(self, url):
- dateStr = self.get_fetchdate()
- soup = self.index_to_soup(url)
- divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
- current_articles = []
- included_urls = []
- divs.reverse()
- for i in divs:
- a = i.find('a', href = True)
- title = self.tag_to_string(a)
- url = a.get('href', False)
- url = 'http://news.mingpao.com/' + dateStr + '/' +url
- if url not in included_urls and url.rfind('Redirect') == -1:
- current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
- def parse_fin_section(self, url):
- dateStr = self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href= True)
- current_articles = []
- included_urls = []
- for i in a:
- url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
- if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
- title = self.tag_to_string(i)
- current_articles.append({'title': title, 'url': url, 'description':''})
- included_urls.append(url)
- return current_articles
+ # special- columns
+ col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+ if col_articles:
+ feeds.append((u'\u5c08\u6b04 Columns', col_articles))
- def parse_ent_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href=True)
- a.reverse()
- current_articles = []
- included_urls = []
- for i in a:
- title = self.tag_to_string(i)
- url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
- if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
- current_articles.append({'title': title, 'url': url, 'description': ''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ return feeds
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll(style=True):
- del item['width']
- for item in soup.findAll(stype=True):
- del item['absmiddle']
- return soup
+ def parse_section(self, url):
+ dateStr = self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+ current_articles = []
+ included_urls = []
+ divs.reverse()
+ for i in divs:
+ a = i.find('a', href = True)
+ title = self.tag_to_string(a)
+ url = a.get('href', False)
+ url = 'http://news.mingpao.com/' + dateStr + '/' +url
+ if url not in included_urls and url.rfind('Redirect') == -1:
+ current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def create_opf(self, feeds, dir=None):
- if dir is None:
- dir = self.output_dir
- if self.IsCJKWellSupported == True:
- # use Chinese title
- title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
- else:
- # use English title
- title = self.short_title() + ' ' + self.get_fetchformatteddate()
- if True: # force date in title
- # title += strftime(self.timefmt)
- mi = MetaInformation(title, [self.publisher])
- mi.publisher = self.publisher
- mi.author_sort = self.publisher
- if self.IsCJKWellSupported == True:
- mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
- else:
- mi.publication_type = self.publication_type+':'+self.short_title()
- #mi.timestamp = nowf()
- mi.timestamp = self.get_dtlocal()
- mi.comments = self.description
- if not isinstance(mi.comments, unicode):
- mi.comments = mi.comments.decode('utf-8', 'replace')
- #mi.pubdate = nowf()
- mi.pubdate = self.get_dtlocal()
- opf_path = os.path.join(dir, 'index.opf')
- ncx_path = os.path.join(dir, 'index.ncx')
- opf = OPFCreator(dir, mi)
- # Add mastheadImage entry to
section
- mp = getattr(self, 'masthead_path', None)
- if mp is not None and os.access(mp, os.R_OK):
- from calibre.ebooks.metadata.opf2 import Guide
- ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
- ref.type = 'masthead'
- ref.title = 'Masthead Image'
- opf.guide.append(ref)
+ def parse_ed_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
- manifest.append(os.path.join(dir, 'index.html'))
- manifest.append(os.path.join(dir, 'index.ncx'))
+ def parse_fin_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href= True)
+ current_articles = []
+ included_urls = []
+ for i in a:
+ #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+ if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+ title = self.tag_to_string(i)
+ current_articles.append({'title': title, 'url': url, 'description':''})
+ included_urls.append(url)
+ return current_articles
- # Get cover
- cpath = getattr(self, 'cover_path', None)
- if cpath is None:
- pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
- if self.default_cover(pf):
- cpath = pf.name
- if cpath is not None and os.access(cpath, os.R_OK):
- opf.cover = cpath
- manifest.append(cpath)
+ def parse_ent_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- # Get masthead
- mpath = getattr(self, 'masthead_path', None)
- if mpath is not None and os.access(mpath, os.R_OK):
- manifest.append(mpath)
+ def parse_col_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- opf.create_manifest_from_files_in(manifest)
- for mani in opf.manifest:
- if mani.path.endswith('.ncx'):
- mani.id = 'ncx'
- if mani.path.endswith('mastheadImage.jpg'):
- mani.id = 'masthead-image'
- entries = ['index.html']
- toc = TOC(base_path=dir)
- self.play_order_counter = 0
- self.play_order_map = {}
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for item in soup.findAll(style=True):
+ del item['width']
+ for item in soup.findAll(stype=True):
+ del item['absmiddle']
+ return soup
- def feed_index(num, parent):
- f = feeds[num]
- for j, a in enumerate(f):
- if getattr(a, 'downloaded', False):
- adir = 'feed_%d/article_%d/'%(num, j)
- auth = a.author
- if not auth:
- auth = None
- desc = a.text_summary
- if not desc:
- desc = None
- else:
- desc = self.description_limiter(desc)
- entries.append('%sindex.html'%adir)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+ def create_opf(self, feeds, dir=None):
+ if dir is None:
+ dir = self.output_dir
+ if __UseChineseTitle__ == True:
+ title = u'\u660e\u5831 (\u9999\u6e2f)'
+ else:
+ title = self.short_title()
+ # if not generating a periodical, force date to apply in title
+ if __MakePeriodical__ == False:
+ title = title + ' ' + self.get_fetchformatteddate()
+ if True:
+ mi = MetaInformation(title, [self.publisher])
+ mi.publisher = self.publisher
+ mi.author_sort = self.publisher
+ if __MakePeriodical__ == True:
+ mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+ else:
+ mi.publication_type = self.publication_type+':'+self.short_title()
+ #mi.timestamp = nowf()
+ mi.timestamp = self.get_dtlocal()
+ mi.comments = self.description
+ if not isinstance(mi.comments, unicode):
+ mi.comments = mi.comments.decode('utf-8', 'replace')
+ #mi.pubdate = nowf()
+ mi.pubdate = self.get_dtlocal()
+ opf_path = os.path.join(dir, 'index.opf')
+ ncx_path = os.path.join(dir, 'index.ncx')
+ opf = OPFCreator(dir, mi)
+ # Add mastheadImage entry to section
+ mp = getattr(self, 'masthead_path', None)
+ if mp is not None and os.access(mp, os.R_OK):
+ from calibre.ebooks.metadata.opf2 import Guide
+ ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+ ref.type = 'masthead'
+ ref.title = 'Masthead Image'
+ opf.guide.append(ref)
+
+ manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+ manifest.append(os.path.join(dir, 'index.html'))
+ manifest.append(os.path.join(dir, 'index.ncx'))
+
+ # Get cover
+ cpath = getattr(self, 'cover_path', None)
+ if cpath is None:
+ pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+ if self.default_cover(pf):
+ cpath = pf.name
+ if cpath is not None and os.access(cpath, os.R_OK):
+ opf.cover = cpath
+ manifest.append(cpath)
+
+ # Get masthead
+ mpath = getattr(self, 'masthead_path', None)
+ if mpath is not None and os.access(mpath, os.R_OK):
+ manifest.append(mpath)
+
+ opf.create_manifest_from_files_in(manifest)
+ for mani in opf.manifest:
+ if mani.path.endswith('.ncx'):
+ mani.id = 'ncx'
+ if mani.path.endswith('mastheadImage.jpg'):
+ mani.id = 'masthead-image'
+ entries = ['index.html']
+ toc = TOC(base_path=dir)
+ self.play_order_counter = 0
+ self.play_order_map = {}
+
+ def feed_index(num, parent):
+ f = feeds[num]
+ for j, a in enumerate(f):
+ if getattr(a, 'downloaded', False):
+ adir = 'feed_%d/article_%d/'%(num, j)
+ auth = a.author
+ if not auth:
+ auth = None
+ desc = a.text_summary
+ if not desc:
+ desc = None
+ else:
+ desc = self.description_limiter(desc)
+ entries.append('%sindex.html'%adir)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
- last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
- for sp in a.sub_pages:
- prefix = os.path.commonprefix([opf_path, sp])
- relp = sp[len(prefix):]
- entries.append(relp.replace(os.sep, '/'))
- last = sp
+ last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ for sp in a.sub_pages:
+ prefix = os.path.commonprefix([opf_path, sp])
+ relp = sp[len(prefix):]
+ entries.append(relp.replace(os.sep, '/'))
+ last = sp
- if os.path.exists(last):
- with open(last, 'rb') as fi:
- src = fi.read().decode('utf-8')
- soup = BeautifulSoup(src)
- body = soup.find('body')
- if body is not None:
- prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
- templ = self.navbar.generate(True, num, j, len(f),
+ if os.path.exists(last):
+ with open(last, 'rb') as fi:
+ src = fi.read().decode('utf-8')
+ soup = BeautifulSoup(src)
+ body = soup.find('body')
+ if body is not None:
+ prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+ templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
- elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
- body.insert(len(body.contents), elem)
- with open(last, 'wb') as fi:
- fi.write(unicode(soup).encode('utf-8'))
- if len(feeds) == 0:
- raise Exception('All feeds are empty, aborting.')
+ elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+ body.insert(len(body.contents), elem)
+ with open(last, 'wb') as fi:
+ fi.write(unicode(soup).encode('utf-8'))
+ if len(feeds) == 0:
+ raise Exception('All feeds are empty, aborting.')
- if len(feeds) > 1:
- for i, f in enumerate(feeds):
- entries.append('feed_%d/index.html'%i)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- auth = getattr(f, 'author', None)
- if not auth:
- auth = None
- desc = getattr(f, 'description', None)
- if not desc:
- desc = None
- feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ if len(feeds) > 1:
+ for i, f in enumerate(feeds):
+ entries.append('feed_%d/index.html'%i)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ auth = getattr(f, 'author', None)
+ if not auth:
+ auth = None
+ desc = getattr(f, 'description', None)
+ if not desc:
+ desc = None
+ feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
- else:
- entries.append('feed_%d/index.html'%0)
- feed_index(0, toc)
+ else:
+ entries.append('feed_%d/index.html'%0)
+ feed_index(0, toc)
- for i, p in enumerate(entries):
- entries[i] = os.path.join(dir, p.replace('/', os.sep))
- opf.create_spine(entries)
- opf.set_toc(toc)
+ for i, p in enumerate(entries):
+ entries[i] = os.path.join(dir, p.replace('/', os.sep))
+ opf.create_spine(entries)
+ opf.set_toc(toc)
- with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
- opf.render(opf_file, ncx_file)
+ with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+ opf.render(opf_file, ncx_file)
diff --git a/resources/recipes/monden.recipe b/resources/recipes/monden.recipe
new file mode 100644
index 0000000000..22764ffe47
--- /dev/null
+++ b/resources/recipes/monden.recipe
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+monden.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Monden(BasicNewsRecipe):
+ title = u'Monden'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
+ publisher = u'Monden'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri,Muzica'
+ encoding = 'utf-8'
+ cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'content'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['postAuthor']})
+ , dict(name='div', attrs={'class':['postLike']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['postLike']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.monden.info/feed/')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/nationalgeoro.recipe b/resources/recipes/nationalgeoro.recipe
index a3c5727d38..8f989be74d 100644
--- a/resources/recipes/nationalgeoro.recipe
+++ b/resources/recipes/nationalgeoro.recipe
@@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe):
__author__ = u'Silviu Cotoar\u0103'
description = u'S\u0103 avem grij\u0103 de planet\u0103'
publisher = 'National Geographic'
- oldest_article = 5
+ oldest_article = 35
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
diff --git a/resources/recipes/nbonline.recipe b/resources/recipes/nbonline.recipe
new file mode 100644
index 0000000000..c5a06edec7
--- /dev/null
+++ b/resources/recipes/nbonline.recipe
@@ -0,0 +1,33 @@
+EMAILADDRESS = 'hoge@foobar.co.jp'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NBOnline(BasicNewsRecipe):
+ title = u'Nikkei Business Online'
+ language = 'ja'
+ description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
+ __author__ = 'Ado Nishimura'
+ needs_subscription = True
+ oldest_article = 7
+ max_articles_per_feed = 100
+ remove_tags_before = dict(id='kanban')
+ remove_tags = [dict(name='div', id='footer')]
+
+ feeds = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
+
+ def get_cover_url(self):
+ return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
+ br.select_form(name='loginActionForm')
+ br['email'] = EMAILADDRESS
+ br['userId'] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
+ def print_version(self, url):
+ return url + '?ST=print'
diff --git a/resources/recipes/nrc-nl-epub.recipe b/resources/recipes/nrc-nl-epub.recipe
index da9b9195ce..2d190e4d0a 100644
--- a/resources/recipes/nrc-nl-epub.recipe
+++ b/resources/recipes/nrc-nl-epub.recipe
@@ -1,14 +1,14 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
# -*- coding: utf-8 -*-
-#Based on Lars Jacob's Taz Digiabo recipe
+#Based on veezh's original recipe and Kovid Goyal's New York Times recipe
__license__ = 'GPL v3'
-__copyright__ = '2010, veezh'
+__copyright__ = '2011, Snaab'
'''
www.nrc.nl
'''
-import os, urllib2, zipfile
+import os, zipfile
import time
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
class NRCHandelsblad(BasicNewsRecipe):
title = u'NRC Handelsblad'
- description = u'De EPUB-versie van NRC'
+ description = u'De ePaper-versie van NRC'
language = 'nl'
lang = 'nl-NL'
+ needs_subscription = True
- __author__ = 'veezh'
+ __author__ = 'Snaab'
conversion_options = {
'no_default_epub_cover' : True
}
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('http://login.nrc.nl/login')
+ br.select_form(nr=0)
+ br['username'] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
def build_index(self):
+
today = time.strftime("%Y%m%d")
+
domain = "http://digitaleeditie.nrc.nl"
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
-# print url
+ #print url
try:
- f = urllib2.urlopen(url)
- except urllib2.HTTPError:
+ br = self.get_browser()
+ f = br.open(url)
+ except:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
raise ValueError('Krant van vandaag nog niet beschikbaar')
+
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
- tmp.close()
-
- zfile = zipfile.ZipFile(tmp.name, 'r')
- self.report_progress(0,_('extracting epub'))
-
- zfile.extractall(self.output_dir)
+ f.close()
+ br.close()
+ if zipfile.is_zipfile(tmp):
+ try:
+ zfile = zipfile.ZipFile(tmp.name, 'r')
+ zfile.extractall(self.output_dir)
+ self.report_progress(0,_('extracting epub'))
+ except zipfile.BadZipfile:
+ self.report_progress(0,_('BadZip error, continuing'))
tmp.close()
- index = os.path.join(self.output_dir, 'content.opf')
+ index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted'))
diff --git a/resources/recipes/oakland_north.recipe b/resources/recipes/oakland_north.recipe
new file mode 100644
index 0000000000..0ad165be40
--- /dev/null
+++ b/resources/recipes/oakland_north.recipe
@@ -0,0 +1,23 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class AdvancedUserRecipe1299640653(BasicNewsRecipe):
+ title = u'Oakland North'
+ oldest_article = 30
+ max_articles_per_feed = 100
+
+ language = 'en'
+ __author__ = 'noah'
+ description = 'Oakland North'
+ category = 'news'
+ no_stylesheets = True
+
+ masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png'
+
+ keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})]
+
+ remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})]
+
+ remove_tags = [dict(name='p', attrs={'class':'post-postscript'})]
+
+ feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')]
diff --git a/resources/recipes/onemagazine.recipe b/resources/recipes/onemagazine.recipe
new file mode 100644
index 0000000000..73dd50141a
--- /dev/null
+++ b/resources/recipes/onemagazine.recipe
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+onemagazine.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Onemagazine(BasicNewsRecipe):
+ title = u'The ONE'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Be the ONE, not anyone ..'
+ publisher = u'The ONE'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Femei'
+ encoding = 'utf-8'
+ cover_url = 'http://www.onemagazine.ro/images/logo_rss.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'article'})
+ , dict(name='div', attrs={'class':'gallery clearfix'})
+ , dict(name='div', attrs={'align':'justify'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['info']})
+ , dict(name='table', attrs={'class':['connect_widget_interactive_area']})
+ , dict(name='span', attrs={'class':['photo']})
+ , dict(name='div', attrs={'class':['counter']})
+ , dict(name='div', attrs={'class':['carousel']})
+ , dict(name='div', attrs={'class':['jcarousel-container jcarousel-container-horizontal']})
+ ]
+
+ remove_tags_after = [
+ dict(name='table', attrs={'class':['connect_widget_interactive_area']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.onemagazine.ro/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/pcworldro.recipe b/resources/recipes/pcworldro.recipe
new file mode 100644
index 0000000000..89ddbaf21d
--- /dev/null
+++ b/resources/recipes/pcworldro.recipe
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+pcworld.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Pcworld(BasicNewsRecipe):
+ title = u'PC World'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'IT'
+ publisher = u'PC World'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri,IT'
+ encoding = 'utf-8'
+ cover_url = 'http://www.pcworld.ro/img/ui/header-logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'content_page'})
+ , dict(name='div', attrs={'class':'box_center content_body'})
+ ]
+
+ remove_tags = [
+ dict(name='h3', attrs={'class':['breadcrumb']})
+ , dict(name='div', attrs={'class':['box_center voteaza']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['box_center voteaza']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.pcworld.ro/contents/pcworld.rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/promotor.recipe b/resources/recipes/promotor.recipe
new file mode 100644
index 0000000000..11a8499d7b
--- /dev/null
+++ b/resources/recipes/promotor.recipe
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+promotor.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Promotor(BasicNewsRecipe):
+ title = u'Promotor'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Auto-moto'
+ publisher = u'Promotor'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,TV,Auto'
+ encoding = 'utf-8'
+ cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'casetatitluarticol'})
+ , dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
+ , dict(name='div', attrs={'class':'textb'})
+ , dict(name='div', attrs={'class':'contentarticol'})
+ ]
+
+ remove_tags = [
+ dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
+ , dict(name='div', attrs={'class':['etichetagry']})
+ , dict(name='span', attrs={'class':['textb']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['etichetagry']})
+ , dict(name='span', attrs={'class':['textb']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.promotor.ro/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/protvmagazin.recipe b/resources/recipes/protvmagazin.recipe
new file mode 100644
index 0000000000..fbdb7465fc
--- /dev/null
+++ b/resources/recipes/protvmagazin.recipe
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+protvmagazin.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Protvmagazin(BasicNewsRecipe):
+ title = u'ProTv Magazin'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Ghid TV'
+ publisher = u'ProTv Magazin'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,TV'
+ encoding = 'utf-8'
+ cover_url = 'http://www.protvmagazin.ro/images/logo.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'box gradient'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['title']})
+ , dict(name='div', attrs={'id':['online_only']})
+ , dict(name='div', attrs={'class':['show_article_rating']})
+ , dict(name='ul', attrs={'class':['breadcrumbs']})
+ , dict(name='p', attrs={'class':['tags']})
+ ]
+
+ remove_tags_after = [
+ dict(name='table', attrs={'class':['connect_widget_interactive_area']})
+ , dict(name='p', attrs={'class':['tags']})
+ , dict(name='dev', attrs={'class':['connect_widget_sample_connections clearfix']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.protvmagazin.ro/rss/articole-noi')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/psychologies.recipe b/resources/recipes/psychologies.recipe
new file mode 100644
index 0000000000..1b69a2ed9e
--- /dev/null
+++ b/resources/recipes/psychologies.recipe
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+psychologies.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Psychologies(BasicNewsRecipe):
+ title = u'Psychologies'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Psihologie \u015fi Dezvoltare Personal\u0103..'
+ publisher = u'Psychologies'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Psihologie'
+ encoding = 'utf-8'
+ cover_url = 'http://www.psychologies.ro/images/default/logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'nav'})
+ , dict(name='div', attrs={'id':'textarticol'})
+
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://feeds.feedburner.com/Psychologies')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/publika.recipe b/resources/recipes/publika.recipe
new file mode 100644
index 0000000000..8380d02b17
--- /dev/null
+++ b/resources/recipes/publika.recipe
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+publika.md
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Publika(BasicNewsRecipe):
+ title = u'Publika'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'\u015etiri din Moldova'
+ publisher = u'Publika'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri,Moldova'
+ encoding = 'utf-8'
+ cover_url = 'http://assets.publika.md/images/logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'colLeft'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['articleInfo']})
+ , dict(name='div', attrs={'class':['articleRelated']})
+ , dict(name='div', attrs={'class':['roundedBox socialSharing']})
+ , dict(name='div', attrs={'class':['comment clearfix']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['roundedBox socialSharing']})
+ , dict(name='div', attrs={'class':['comment clearfix']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://rss.publika.md/stiri.xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/sltrib.py b/resources/recipes/sltrib.py
new file mode 100644
index 0000000000..a6701ae296
--- /dev/null
+++ b/resources/recipes/sltrib.py
@@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278347258(BasicNewsRecipe):
+ title = u'Salt Lake City Tribune'
+ __author__ = 'Charles Holbert'
+ oldest_article = 7
+ max_articles_per_feed = 100
+
+ description = '''Utah's independent news source since 1871'''
+ publisher = 'http://www.sltrib.com/'
+ category = 'news, Utah, SLC'
+ language = 'en'
+ encoding = 'utf-8'
+ #delay = 1
+ #simultaneous_downloads = 1
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+
+ #masthead_url = 'http://www.sltrib.com/csp/cms/sites/sltrib/assets/images/logo_main.png'
+ #cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg9/lg/UT_SLT.jpg'
+
+ keep_only_tags = [dict(name='div',attrs={'id':'imageBox'})
+ ,dict(name='div',attrs={'class':'headline'})
+ ,dict(name='div',attrs={'class':'byline'})
+ ,dict(name='p',attrs={'class':'TEXT_w_Indent'})]
+
+ feeds = [(u'SL Tribune Today', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=All'),
+ (u'Utah News', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=UtahNews'),
+ (u'Business News', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Money'),
+ (u'Technology', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Technology'),
+ (u'Most Popular', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rsspopular.csp'),
+ (u'Sports', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Sports')]
+
+ extra_css = '''
+ .headline{font-family:Arial,Helvetica,sans-serif; font-size:xx-large; font-weight: bold; color:#0E5398;}
+ .byline{font-family:Arial,Helvetica,sans-serif; color:#333333; font-size:xx-small;}
+ .storytext{font-family:Arial,Helvetica,sans-serif; font-size:medium;}
+ '''
+
+ def print_version(self, url):
+ seg = url.split('/')
+ x = seg[5].split('-')
+ baseURL = 'http://www.sltrib.com/csp/cms/sites/sltrib/pages/printerfriendly.csp?id='
+ s = baseURL + x[0]
+ return s
+
+ def get_cover_url(self):
+ cover_url = None
+ href = 'http://www.newseum.org/todaysfrontpages/hr.asp?fpVname=UT_SLT&ref_pge=lst'
+ soup = self.index_to_soup(href)
+ div = soup.find('div',attrs={'class':'tfpLrgView_container'})
+ if div:
+ cover_url = div.img['src']
+ return cover_url
+
diff --git a/resources/recipes/thai_post_daily.recipe b/resources/recipes/thai_post_daily.recipe
index 2be17cc37f..4523a9331a 100644
--- a/resources/recipes/thai_post_daily.recipe
+++ b/resources/recipes/thai_post_daily.recipe
@@ -3,6 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
title = u'Thai Post Daily'
__author__ = 'Chotechai P.'
+ language = 'th'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
diff --git a/resources/recipes/timesnewroman.recipe b/resources/recipes/timesnewroman.recipe
new file mode 100644
index 0000000000..12672aa888
--- /dev/null
+++ b/resources/recipes/timesnewroman.recipe
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+timesnewroman.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TimesNewRoman(BasicNewsRecipe):
+ title = u'Times New Roman'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Cotidian independent de umor voluntar'
+ publisher = u'Times New Roman'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Fun'
+ encoding = 'utf-8'
+ cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'page'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['articleinfo']})
+ , dict(name='div',attrs={'class':['vergefacebooklike']})
+ , dict(name='div', attrs={'class':'cleared'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':'cleared'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/trombon.recipe b/resources/recipes/trombon.recipe
new file mode 100644
index 0000000000..1a4e488a43
--- /dev/null
+++ b/resources/recipes/trombon.recipe
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+trombon.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Trombon(BasicNewsRecipe):
+ title = u'Trombon'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Parodii si Pamflete'
+ publisher = u'Trombon'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Fun'
+ encoding = 'utf-8'
+ cover_url = 'http://www.trombon.ro/i/trombon.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'articol'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['info_2']})
+ , dict(name='iframe', attrs={'scrolling':['no']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'id':'article_vote'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/tvmania.recipe b/resources/recipes/tvmania.recipe
new file mode 100644
index 0000000000..a2d9fcc060
--- /dev/null
+++ b/resources/recipes/tvmania.recipe
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+tvmania.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Tvmania(BasicNewsRecipe):
+ title = u'TVmania'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Programe TV'
+ publisher = u'TVmania'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,TV'
+ encoding = 'utf-8'
+ cover_url = 'http://www.tvmania.ro/wp-content/themes/tvmania/images/logo.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'articol'})
+ , dict(name='font', attrs={'class':'mic'})
+ , dict(name='div', attrs={'id':'header_recomandari'})
+ , dict(name='div', attrs={'class':'main-image'})
+ , dict(name='div', attrs={'id':'articol_recomandare'})
+
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['iLikeThis']})
+ , dict(name='span', attrs={'class':['tag-links']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['iLikeThis']})
+ , dict(name='span', attrs={'class':['tag-links']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.tvmania.ro/feed')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/viva.recipe b/resources/recipes/viva.recipe
new file mode 100644
index 0000000000..df697ea298
--- /dev/null
+++ b/resources/recipes/viva.recipe
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+viva.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Viva(BasicNewsRecipe):
+ title = u'Viva'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Vedete si evenimente'
+ publisher = u'Viva'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Femei'
+ encoding = 'utf-8'
+ cover_url = 'http://www.viva.ro/images/default/viva.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'articol'})
+ , dict(name='div', attrs={'class':'gallery clearfix'})
+ , dict(name='div', attrs={'align':'justify'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['breadcrumbs']})
+ , dict(name='div', attrs={'class':['links clearfix']})
+ , dict(name='a', attrs={'id':['img_arrow_right']})
+ , dict(name='img', attrs={'id':['zoom']})
+ , dict(name='div', attrs={'class':['foto_counter']})
+ , dict(name='div', attrs={'class':['gal_select clearfix']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['links clearfix']})
+ ]
+
+ feeds = [
+ (u'Vedete', u'http://feeds.feedburner.com/viva-Vedete')
+ ,(u'Evenimente', u'http://feeds.feedburner.com/viva-Evenimente')
+ ,(u'Frumusete', u'http://feeds.feedburner.com/viva-Beauty-Fashion')
+ ,(u'Noutati', u'http://feeds.feedburner.com/viva-Noutati')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/wallstreetro.recipe b/resources/recipes/wallstreetro.recipe
new file mode 100644
index 0000000000..8a66aa3673
--- /dev/null
+++ b/resources/recipes/wallstreetro.recipe
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+wall-street.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class WallStreetRo(BasicNewsRecipe):
+ title = u'Wall Street'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = ''
+ publisher = 'Wall Street'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare'
+ encoding = 'utf-8'
+ cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'article_header'})
+ , dict(name='div', attrs={'class':'article_text'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['page_breadcrumbs']})
+ , dict(name='div', attrs={'id':['article_user_toolbox']})
+ , dict(name='p', attrs={'class':['comments_count_container']})
+ , dict(name='div', attrs={'class':['article_left_column']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':'clearfloat'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/template-functions.json b/resources/template-functions.json
index fe4379d701..c19627c6c7 100644
--- a/resources/template-functions.json
+++ b/resources/template-functions.json
@@ -16,6 +16,7 @@
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
+ "subitems": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n items = [v.strip() for v in val.split(',')]\n rv = set()\n for item in items:\n component = item.split('.')\n try:\n if ei == 0:\n rv.add('.'.join(component[si:]))\n else:\n rv.add('.'.join(component[si:ei]))\n except:\n pass\n return ', '.join(sorted(rv, key=sort_key))\n",
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
diff --git a/resources/templates/fb2.xsl b/resources/templates/fb2.xsl
index 77c03cdc74..273edd71ae 100644
--- a/resources/templates/fb2.xsl
+++ b/resources/templates/fb2.xsl
@@ -4,6 +4,7 @@
# #
# #
# copyright 2002 Paul Henry Tremblay #
+# Copyright 2011 Kovid Goyal
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
@@ -19,21 +20,21 @@
#########################################################################
-->
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+