Merge from trunk
@ -2,6 +2,7 @@
|
||||
.check-cache.pickle
|
||||
src/calibre/plugins
|
||||
resources/images.qrc
|
||||
src/calibre/ebooks/oeb/display/test/*.js
|
||||
src/calibre/manual/.build/
|
||||
src/calibre/manual/cli/
|
||||
src/calibre/manual/template_ref.rst
|
||||
@ -15,6 +16,7 @@ resources/ebook-convert-complete.pickle
|
||||
resources/builtin_recipes.xml
|
||||
resources/builtin_recipes.zip
|
||||
resources/template-functions.json
|
||||
resources/display/*.js
|
||||
setup/installer/windows/calibre/build.log
|
||||
src/calibre/translations/.errors
|
||||
src/cssutils/.svn/
|
||||
|
4210
Changelog.old.yaml
5071
Changelog.yaml
152
imgsrc/calibreSymbols.spd
Normal file
@ -0,0 +1,152 @@
|
||||
SplineFontDB: 3.0
|
||||
FontName: calibreSymbols
|
||||
FullName: calibre Symbols
|
||||
FamilyName: calibre Symbols
|
||||
Weight: Medium
|
||||
Copyright: Created by Kovid Goyal with FontForge 2.0 (http://fontforge.sf.net)
|
||||
UComments: "2012-2-27: Created."
|
||||
Version: 001.000
|
||||
ItalicAngle: 0
|
||||
UnderlinePosition: -100
|
||||
UnderlineWidth: 50
|
||||
Ascent: 800
|
||||
Descent: 200
|
||||
LayerCount: 2
|
||||
Layer: 0 0 "Back" 1
|
||||
Layer: 1 0 "Fore" 0
|
||||
NeedsXUIDChange: 1
|
||||
XUID: [1021 913 325894820 11538708]
|
||||
FSType: 0
|
||||
OS2Version: 0
|
||||
OS2_WeightWidthSlopeOnly: 0
|
||||
OS2_UseTypoMetrics: 1
|
||||
CreationTime: 1330331997
|
||||
ModificationTime: 1330487767
|
||||
OS2TypoAscent: 0
|
||||
OS2TypoAOffset: 1
|
||||
OS2TypoDescent: 0
|
||||
OS2TypoDOffset: 1
|
||||
OS2TypoLinegap: 90
|
||||
OS2WinAscent: 0
|
||||
OS2WinAOffset: 1
|
||||
OS2WinDescent: 0
|
||||
OS2WinDOffset: 1
|
||||
HheadAscent: 0
|
||||
HheadAOffset: 1
|
||||
HheadDescent: 0
|
||||
HheadDOffset: 1
|
||||
MarkAttachClasses: 1
|
||||
DEI: 91125
|
||||
Encoding: UnicodeFull
|
||||
UnicodeInterp: none
|
||||
NameList: Adobe Glyph List
|
||||
DisplaySize: -24
|
||||
AntiAlias: 1
|
||||
FitToEm: 1
|
||||
WidthSeparation: 150
|
||||
WinInfo: 9600 75 22
|
||||
BeginPrivate: 0
|
||||
EndPrivate
|
||||
BeginChars: 1114112 3
|
||||
|
||||
StartChar: uni2605
|
||||
Encoding: 9733 9733 0
|
||||
Width: 979
|
||||
VWidth: -26
|
||||
Flags: W
|
||||
LayerCount: 2
|
||||
Fore
|
||||
SplineSet
|
||||
551.923 352.862 m 1
|
||||
749.497 369.592 l 2
|
||||
804.954 374.123 833.379 376.389 834.765 376.389 c 0
|
||||
852.095 376.389 860.761 368.896 860.761 353.907 c 0
|
||||
860.761 347.981 859.028 343.363 855.562 340.052 c 0
|
||||
852.095 336.74 825.578 319.225 776.012 287.506 c 2
|
||||
609.635 180.323 l 1
|
||||
716.22 -88.417 l 2
|
||||
717.606 -91.2051 718.301 -95.3877 718.301 -100.965 c 0
|
||||
718.301 -106.193 716.394 -110.725 712.58 -114.558 c 0
|
||||
708.769 -118.393 704.608 -120.31 700.104 -120.31 c 0
|
||||
695.943 -120.31 691.61 -118.828 687.103 -115.866 c 0
|
||||
682.598 -112.902 658.162 -92.251 613.795 -53.9082 c 2
|
||||
466.134 74.71 l 1
|
||||
320.554 -51.8184 l 2
|
||||
274.802 -91.5547 249.758 -112.902 245.426 -115.866 c 0
|
||||
241.092 -118.828 236.846 -120.31 232.688 -120.31 c 0
|
||||
227.835 -120.31 223.415 -118.306 219.429 -114.297 c 0
|
||||
215.442 -110.289 213.449 -105.844 213.449 -100.965 c 0
|
||||
213.449 -97.8281 223.329 -71.3379 243.087 -21.4932 c 2
|
||||
322.115 180.323 l 1
|
||||
152.618 289.598 l 2
|
||||
104.783 320.271 79.2217 337.176 75.9297 340.313 c 0
|
||||
72.6357 343.45 70.9893 347.981 70.9893 353.907 c 0
|
||||
70.9893 369.243 79.8291 376.912 97.5059 376.912 c 0
|
||||
98.8926 376.912 123.155 374.82 170.296 370.638 c 2
|
||||
379.825 352.862 l 1
|
||||
427.14 555.201 l 2
|
||||
439.271 607.834 446.811 636.764 449.757 641.992 c 0
|
||||
452.702 647.221 458.162 649.834 466.134 649.834 c 0
|
||||
474.454 649.834 480 646.96 482.772 641.208 c 0
|
||||
485.545 635.457 493.518 604.173 506.689 547.357 c 2
|
||||
551.923 352.862 l 1
|
||||
EndSplineSet
|
||||
Validated: 524289
|
||||
EndChar
|
||||
|
||||
StartChar: zero
|
||||
Encoding: 48 48 1
|
||||
Width: 1303
|
||||
VWidth: 2048
|
||||
Flags: W
|
||||
HStem: -43.3789 76.7998<582.097 721.09> 623.341 76.7998<582.097 721.091>
|
||||
VStem: 403.82 97.4395<148.044 508.66> 802.221 96.959<148.044 508.659>
|
||||
LayerCount: 2
|
||||
Fore
|
||||
SplineSet
|
||||
651.5 623.341 m 0
|
||||
601.58 623.341 564.061 598.78 538.939 549.66 c 0
|
||||
513.82 500.541 501.26 426.7 501.26 328.141 c 0
|
||||
501.26 229.9 513.82 156.221 538.939 107.101 c 0
|
||||
564.061 57.9805 601.58 33.4209 651.5 33.4209 c 0
|
||||
701.74 33.4209 739.42 57.9805 764.54 107.101 c 0
|
||||
789.66 156.221 802.221 229.9 802.221 328.141 c 0
|
||||
802.221 426.7 789.66 500.541 764.54 549.66 c 0
|
||||
739.42 598.78 701.74 623.341 651.5 623.341 c 0
|
||||
651.5 700.141 m 0
|
||||
731.82 700.141 793.18 668.38 835.58 604.859 c 0
|
||||
877.979 541.341 899.18 449.101 899.18 328.141 c 0
|
||||
899.18 207.5 877.979 115.421 835.58 51.9004 c 0
|
||||
793.18 -11.6201 731.819 -43.3789 651.5 -43.3789 c 0
|
||||
571.18 -43.3789 509.82 -11.6201 467.42 51.9004 c 0
|
||||
425.021 115.421 403.82 207.5 403.82 328.141 c 0
|
||||
403.82 449.101 425.021 541.341 467.42 604.859 c 0
|
||||
509.82 668.38 571.18 700.141 651.5 700.141 c 0
|
||||
EndSplineSet
|
||||
Validated: 1
|
||||
EndChar
|
||||
|
||||
StartChar: period
|
||||
Encoding: 46 46 2
|
||||
Width: 516
|
||||
VWidth: 2048
|
||||
Flags: W
|
||||
HStem: 53.4004 166.199<203.263 309.297>
|
||||
VStem: 174.6 163.801<82.9501 190.955>
|
||||
LayerCount: 2
|
||||
Fore
|
||||
SplineSet
|
||||
338.4 142.8 m 0
|
||||
338.4 119.2 330.5 98.4004 314.7 80.4004 c 0
|
||||
298.9 62.4004 277 53.4004 249 53.4004 c 0
|
||||
225.4 53.4004 207.1 61.2002 194.1 76.7998 c 0
|
||||
181.1 92.4004 174.6 111 174.6 132.6 c 0
|
||||
174.6 155.8 182.6 176.1 198.6 193.5 c 0
|
||||
214.6 210.9 236.8 219.6 265.2 219.6 c 0
|
||||
288.8 219.6 306.9 212.2 319.5 197.4 c 0
|
||||
332.1 182.6 338.4 164.4 338.4 142.8 c 0
|
||||
EndSplineSet
|
||||
Validated: 1
|
||||
EndChar
|
||||
EndChars
|
||||
EndSplineFont
|
@ -1,19 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Adventure zone - adventure games from A to Z'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content=False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
|
||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after= dict(id='comments')
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
def parse_feeds (self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
|
||||
tag=soup.find(name='channel')
|
||||
titles=[]
|
||||
for r in tag.findAll(name='image'):
|
||||
r.extract()
|
||||
art=tag.findAll(name='item')
|
||||
for i in art:
|
||||
titles.append(i.title.string)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
article.title=titles[feed.articles.index(article)]
|
||||
return feeds
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
for r in skip_tag:
|
||||
if 'articles.php?' in r['href']:
|
||||
if r.strong is not None:
|
||||
if r.strong:
|
||||
word=r.strong.string
|
||||
if ('zapowied' or 'recenzj') in word:
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
|
||||
else:
|
||||
None
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('news.php?readmore', 'print.php?type=N&item_id')
|
||||
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
50
recipes/al_masry_al_youm.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||
'''
|
||||
abc.net.au/news
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TheDailyNewsEG(BasicNewsRecipe):
|
||||
title = u'al-masry al-youm'
|
||||
__author__ = 'Omm Mishmishah'
|
||||
description = 'Independent News from Egypt'
|
||||
masthead_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
|
||||
cover_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
|
||||
|
||||
auto_cleanup = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'Independent News Egypt'
|
||||
category = 'News, Egypt, World'
|
||||
language = 'en_EG'
|
||||
publication_type = 'newsportal'
|
||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['article section']})]
|
||||
|
||||
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||
'story-map', 'statepromo', 'topics', ]})]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
|
||||
(u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
|
||||
(u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
|
||||
(u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
|
||||
]
|
18
recipes/albert_mohler.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
title = u'Albert Mohler\'s Blog'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
|
||||
publisher = 'Albert Mohler'
|
||||
language = 'en'
|
||||
author = 'Albert Mohler'
|
||||
|
||||
feeds = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
max_articles_per_feed = 15
|
||||
html2lrf_options = ['--force-page-break-before-tag="chapter"']
|
||||
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
|
@ -7,6 +7,7 @@ class Archeowiesci(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||
oldest_article = 7
|
||||
needs_subscription='optional'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
|
||||
@ -16,6 +17,16 @@ class Archeowiesci(BasicNewsRecipe):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
if 'subskrypcja' in article.title:
|
||||
if self.username is None and 'subskrypcja' in article.title:
|
||||
feed.articles.remove(article)
|
||||
return feeds
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://archeowiesci.pl/wp-login.php')
|
||||
br.select_form(name='loginform')
|
||||
br['log'] = self.username
|
||||
br['pwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
51
recipes/asianreviewofbooks.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.asianreviewofbooks.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
title = 'The Asian Review of Books'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.'
|
||||
publisher = 'The Asian Review of Books'
|
||||
category = 'literature, books, reviews, Asia'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'en_HK'
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.asianreviewofbooks.com/new/images/mob_arb.png'
|
||||
extra_css = """
|
||||
body{font-family: serif}
|
||||
.big {font-size: xx-large}
|
||||
.bold {font-weight: bold}
|
||||
.italic {font-style: italic}
|
||||
.small {font-size: small}
|
||||
img {display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
remove_tags = [dict(name=['object','script','iframe','embed'])]
|
||||
remove_attributes = ['style', 'onclick']
|
||||
feeds = [(u'Articles' , u'http://www.asianreviewofbooks.com/new/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
root, sep, artid = url.rpartition('?ID=')
|
||||
return root + 'getarticle.php?articleID=' + artid + '&stats=web'
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
|
||||
|
@ -1,5 +1,4 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
#extra_css= 'table {text-align: left;}'
|
||||
no_stylesheets=True
|
||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
||||
# no_stylesheets= True
|
||||
remove_tags=[dict(name='hr')]
|
||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(align=True):
|
||||
del item['align']
|
||||
return soup
|
||||
|
@ -1,15 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class Astronomia_pl(BasicNewsRecipe):
|
||||
title = u'Astronomia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Astronomia - polish astronomy site'
|
||||
masthead_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
#no_stylesheets=True
|
||||
extra_css='#h2 {font-size: 18px;}'
|
||||
no_stylesheets=True
|
||||
preprocess_regexps = [(re.compile(ur'<b>Przeczytaj także:.*?</BODY>', re.DOTALL), lambda match: '</BODY>') ]
|
||||
remove_tags_before=dict(name='div', attrs={'id':'a1'})
|
||||
keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
|
||||
feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
|
||||
|
@ -4,16 +4,17 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
title = u'Benchmark.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'benchmark.pl -IT site'
|
||||
masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
|
||||
cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets=True
|
||||
preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'body'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
INDEX= 'http://www.benchmark.pl'
|
||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
|
||||
|
16
recipes/beppe_grillo.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1327747616(BasicNewsRecipe):
|
||||
title = u'Beppe Grillo'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Beppe Grillo', u'http://feeds.feedburner.com/beppegrillo/atom')]
|
||||
description = 'Blog of the famous comedian and politician Beppe Grillo - v1.00 (28, January 2012)'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
language = 'it'
|
||||
|
@ -1,61 +1,44 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
'''Calibre recipe to convert the RSS feeds of the Berliner Zeitung to an ebook.'''
|
||||
|
||||
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
__author__ = 'ape'
|
||||
__copyright__ = 'ape'
|
||||
__author__ = 'a.peter'
|
||||
__copyright__ = 'a.peter'
|
||||
__license__ = 'GPL v3'
|
||||
language = 'de'
|
||||
description = 'Berliner Zeitung'
|
||||
version = 2
|
||||
description = 'Berliner Zeitung RSS'
|
||||
version = 4
|
||||
title = u'Berliner Zeitung'
|
||||
timefmt = ' [%d.%m.%Y]'
|
||||
|
||||
#oldest_article = 7.0
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
publication_type = 'newspaper'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
|
||||
remove_tags_before = dict(name='div', attrs={'class':'newstype'})
|
||||
remove_tags_after = [dict(id='article_text')]
|
||||
|
||||
INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
|
||||
|
||||
def parse_index(self):
|
||||
base = 'http://www.berlinonline.de'
|
||||
answer = []
|
||||
articles = {}
|
||||
more = 1
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
# Get list of links to ressorts from index page
|
||||
ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
|
||||
for ressort in ressort_list[0].findAll('a'):
|
||||
feed_title = ressort.string
|
||||
print 'Analyzing', feed_title
|
||||
if not articles.has_key(feed_title):
|
||||
articles[feed_title] = []
|
||||
answer.append(feed_title)
|
||||
# Load ressort page.
|
||||
feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
|
||||
# find mainbar div which contains the list of all articles
|
||||
for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
|
||||
# iterate over all articles
|
||||
for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
|
||||
# extract title of article
|
||||
if article_teaser.h3 != None:
|
||||
article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url' : base + article_teaser.h3.a['href'], 'description' : u''}
|
||||
articles[feed_title].append(article)
|
||||
else:
|
||||
# Skip teasers for missing photos
|
||||
if article_teaser.div.p.contents[0].find('Foto:') > -1:
|
||||
continue
|
||||
article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
|
||||
articles[feed_title].append(article)
|
||||
more += 1
|
||||
answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
|
||||
return answer
|
||||
feeds = [(u'Startseite', u'http://www.berliner-zeitung.de/home/10808950,10808950,view,asFeed.xml'),
|
||||
(u'Politik', u'http://www.berliner-zeitung.de/home/10808018,10808018,view,asFeed.xml'),
|
||||
(u'Wirtschaft', u'http://www.berliner-zeitung.de/home/10808230,10808230,view,asFeed.xml'),
|
||||
(u'Berlin', u'http://www.berliner-zeitung.de/home/10809148,10809148,view,asFeed.xml'),
|
||||
(u'Brandenburg', u'http://www.berliner-zeitung.de/home/10809312,10809312,view,asFeed.xml'),
|
||||
(u'Wissenschaft', u'http://www.berliner-zeitung.de/home/10808894,10808894,view,asFeed.xml'),
|
||||
(u'Digital', u'http://www.berliner-zeitung.de/home/10808718,10808718,view,asFeed.xml'),
|
||||
(u'Kultur', u'http://www.berliner-zeitung.de/home/10809150,10809150,view,asFeed.xml'),
|
||||
(u'Panorama', u'http://www.berliner-zeitung.de/home/10808334,10808334,view,asFeed.xml'),
|
||||
(u'Sport', u'http://www.berliner-zeitung.de/home/10808794,10808794,view,asFeed.xml'),
|
||||
(u'Hertha', u'http://www.berliner-zeitung.de/home/10808800,10808800,view,asFeed.xml'),
|
||||
(u'Union', u'http://www.berliner-zeitung.de/home/10808802,10808802,view,asFeed.xml'),
|
||||
(u'Verkehr', u'http://www.berliner-zeitung.de/home/10809298,10809298,view,asFeed.xml'),
|
||||
(u'Polizei', u'http://www.berliner-zeitung.de/home/10809296,10809296,view,asFeed.xml'),
|
||||
(u'Meinung', u'http://www.berliner-zeitung.de/home/10808020,10808020,view,asFeed.xml')]
|
||||
|
||||
def get_masthead_url(self):
|
||||
return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'
|
||||
return 'http://www.berliner-zeitung.de/image/view/10810244,7040611,data,logo.png'
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.html', ',view,printVersion.html')
|
||||
|
20
recipes/biolog_pl.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Biolog_pl(BasicNewsRecipe):
|
||||
title = u'Biolog.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds=True
|
||||
__author__ = 'fenuks'
|
||||
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
|
||||
category = 'biology'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
no_stylesheets = True
|
||||
#keeps_only_tags=[dict(id='main')]
|
||||
remove_tags_before=dict(id='main')
|
||||
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
|
||||
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'}), dict(name='span', attrs={'class':'menu_odsylacze'})]
|
||||
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
|
44
recipes/birmingham_post.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Birmingham post'
|
||||
description = 'News for Birmingham UK'
|
||||
timefmt = ''
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
|
||||
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
#dict(name='h1',attrs={'id' : 'article-headline'}),
|
||||
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
|
||||
#dict(name='p')
|
||||
#dict(attrs={'id' : 'three-col'})
|
||||
]
|
||||
remove_tags = [
|
||||
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
|
||||
|
||||
]
|
||||
feeds = [
|
||||
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
|
||||
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
|
||||
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
|
||||
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
|
||||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body {font: sans-serif medium;}'
|
||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
||||
'''
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.blic.rs/')
|
||||
alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
|
||||
if alink:
|
||||
return 'http://www.blic.rs' + alink['href']
|
||||
return None
|
||||
|
26
recipes/blues.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Oskar Kunicki <rakso at interia.pl>'
|
||||
'''
|
||||
Changelog:
|
||||
2011-11-27
|
||||
News from BluesRSS.info
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BluesRSS(BasicNewsRecipe):
|
||||
title = 'Blues News'
|
||||
__author__ = 'Oskar Kunicki'
|
||||
description ='Blues news from around the world'
|
||||
publisher = 'BluesRSS.info'
|
||||
category = 'news, blues, USA,UK'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
cover_url = 'http://bluesrss.info/cover.jpg'
|
||||
masthead_url = 'http://bluesrss.info/cover.jpg'
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'wp-pagenavi'})]
|
||||
|
||||
feeds = [(u'News', u'http://bluesrss.info/feed/')]
|
@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
borba.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Borba(BasicNewsRecipe):
|
||||
title = 'Borba Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne novine Borba Online'
|
||||
publisher = 'IP Novine Borba'
|
||||
category = 'news, politics, Serbia'
|
||||
language = 'sr'
|
||||
|
||||
lang = _('sr-Latn-RS')
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||
INDEX = u'http://www.borba.rs/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'id':'written_comments_title'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
||||
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
||||
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
||||
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
||||
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
||||
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
||||
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
||||
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
||||
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,45 +7,76 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
title = u'Calgary Herald'
|
||||
url_prefix = 'http://www.calgaryherald.com'
|
||||
description = u'News from Calgary, AB'
|
||||
fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
encoding = 'latin1'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
@ -64,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
@ -98,8 +196,6 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = atag['href']
|
||||
if not url.startswith('http:'):
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
|
11
recipes/catholic_daily_readings.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1328971305(BasicNewsRecipe):
|
||||
title = u'Catholic Daily Readings'
|
||||
language = 'en'
|
||||
__author__ = 'adoucette'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Daily Readings - USCCB', u'http://www.usccb.org/bible/readings/rss/'), (u'Daily Reflection - One Bread One Body', u'http://www.presentationministries.com/general/rss.asp'), (u'Mass Readings - Universalis', u'http://www.universalis.com/atommass3.xml'), (u'Saint Of The Day - CNA', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
|
@ -1,16 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CD_Action(BasicNewsRecipe):
|
||||
title = u'CD-Action'
|
||||
__author__ = 'fenuks'
|
||||
description = 'cdaction.pl - polish magazine about games site'
|
||||
description = 'cdaction.pl - polish games magazine site'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
|
||||
keep_only_tags= dict(id='news_content')
|
||||
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
|
||||
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
||||
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -5,6 +5,7 @@ class CGM(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'Codzienna Gazeta Muzyczna'
|
||||
masthead_url='http://www.cgm.pl/img/header/logo.gif'
|
||||
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
@ -23,21 +24,19 @@ class CGM(BasicNewsRecipe):
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||
if gallery:
|
||||
img=gallery.div
|
||||
gallery.img.extract()
|
||||
if img:
|
||||
img=img['style']
|
||||
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
||||
gallery.contents[1].name='img'
|
||||
gallery.contents[1]['src']=img
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ad=soup.findAll('a')
|
||||
for r in ad:
|
||||
if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:
|
||||
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
|
||||
r.extract()
|
||||
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||
if gallery:
|
||||
img=gallery.find('embed')
|
||||
if img:
|
||||
img=img['src'][35:]
|
||||
img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
|
||||
param=gallery.findAll(name='param')
|
||||
for i in param:
|
||||
i.extract()
|
||||
gallery.contents[1].name='img'
|
||||
gallery.contents[1]['src']=img
|
||||
return soup
|
@ -3,6 +3,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import urllib, re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChicagoTribune(BasicNewsRecipe):
|
||||
@ -77,8 +78,25 @@ class ChicagoTribune(BasicNewsRecipe):
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
ans = None
|
||||
try:
|
||||
s = article.summary
|
||||
ans = urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
if ans is None:
|
||||
ans = article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
if ans is not None:
|
||||
return ans.replace('?track=rss', '')
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
text = soup.find(text='click here to continue to article')
|
||||
if text:
|
||||
a = text.parent
|
||||
url = a.get('href')
|
||||
if url:
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
# Remove the navigation bar. It was kept until now to be able to follow
|
||||
|
@ -33,6 +33,32 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
requires_version = (0, 8, 39)
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
try:
|
||||
from html5lib import parse
|
||||
root = parse(raw, namespaceHTMLElements=False,
|
||||
treebuilder='lxml').getroot()
|
||||
from lxml import etree
|
||||
for tag in root.xpath(
|
||||
'//script|//style|//noscript|//meta|//link|//object'):
|
||||
tag.getparent().remove(tag)
|
||||
for elem in list(root.iterdescendants(tag=etree.Comment)):
|
||||
elem.getparent().remove(elem)
|
||||
ans = etree.tostring(root, encoding=unicode)
|
||||
ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
|
||||
return ans
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
def index_to_soup(self, url):
|
||||
raw = BasicNewsRecipe.index_to_soup(self, url,
|
||||
raw=True).decode('utf-8')
|
||||
raw = self.preprocess_raw_html(raw, url)
|
||||
return BasicNewsRecipe.index_to_soup(self, raw)
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
nav = soup.find('div',attrs={'class':'navigation'})
|
||||
@ -78,14 +104,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
print_soup = soup
|
||||
return print_soup
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<!--.*?-->', lambda match : ''),
|
||||
(r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
|
||||
(r'<div class="pubdate">.*?</div>', lambda m: ''),
|
||||
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
|
||||
lambda match : '</body>'),
|
||||
]]
|
||||
extra_css = '''
|
||||
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
|
||||
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
|
||||
|
48
recipes/ciekawostki_historyczne.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Ciekawostki_Historyczne(BasicNewsRecipe):
|
||||
title = u'Ciekawostki Historyczne'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
cover_url='http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
||||
no_stylesheets=True
|
||||
remove_empty_feeds=True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags=[dict(id='singlepostinfo')]
|
||||
feeds = [(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(name='h7')
|
||||
if tag:
|
||||
if tag.br:
|
||||
pass
|
||||
elif tag.nextSibling.name=='p':
|
||||
tag=tag.nextSibling
|
||||
nexturl = tag.findAll('a')
|
||||
for nextpage in nexturl:
|
||||
tag.extract()
|
||||
nextpage= nextpage['href']
|
||||
soup2 = self.index_to_soup(nextpage)
|
||||
pagetext = soup2.find(name='div', attrs={'class':'post'})
|
||||
for r in pagetext.findAll('div', attrs={'id':'singlepostinfo'}):
|
||||
r.extract()
|
||||
for r in pagetext.findAll('div', attrs={'class':'wp-caption alignright'}):
|
||||
r.extract()
|
||||
for r in pagetext.findAll('h1'):
|
||||
r.extract()
|
||||
pagetext.find('h6').nextSibling.extract()
|
||||
pagetext.find('h7').nextSibling.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
@ -23,7 +23,9 @@ class TheCND(BasicNewsRecipe):
|
||||
remove_tags = [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
|
||||
no_stylesheets = True
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
preprocess_regexps = [ (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
|
||||
(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if url.find('news/article.php') >= 0:
|
||||
@ -46,13 +48,15 @@ class TheCND(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a)
|
||||
self.log('\tFound article: ', title, 'at', url)
|
||||
date = a.nextSibling
|
||||
if re.search('cm', date):
|
||||
continue
|
||||
if (date is not None) and len(date)>2:
|
||||
if not articles.has_key(date):
|
||||
articles[date] = []
|
||||
articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
|
||||
self.log('\t\tAppend to : ', date)
|
||||
|
||||
self.log('log articles', articles)
|
||||
#self.log('log articles', articles)
|
||||
mostCurrent = sorted(articles).pop()
|
||||
self.title = 'CND ' + mostCurrent
|
||||
|
||||
|
72
recipes/cnd_weekly.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
|
||||
'''
|
||||
cnd.org
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheCND(BasicNewsRecipe):
|
||||
|
||||
title = 'CND Weekly'
|
||||
__author__ = 'Derek Liang'
|
||||
description = ''
|
||||
INDEX = 'http://cnd.org'
|
||||
language = 'zh'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
|
||||
remove_tags_before = dict(name='div', id='articleHead')
|
||||
remove_tags_after = dict(id='copyright')
|
||||
remove_tags = [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
|
||||
no_stylesheets = True
|
||||
|
||||
preprocess_regexps = [ (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
|
||||
(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if url.find('news/article.php') >= 0:
|
||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
|
||||
else:
|
||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
feeds = []
|
||||
articles = {}
|
||||
|
||||
for a in soup.findAll('a', attrs={'target':'_cnd'}):
|
||||
url = a['href']
|
||||
if url.find('article.php') < 0 :
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://cnd.org'+url
|
||||
title = self.tag_to_string(a)
|
||||
date = a.nextSibling
|
||||
if not re.search('cm', date):
|
||||
continue
|
||||
self.log('\tFound article: ', title, 'at', url, '@', date)
|
||||
if (date is not None) and len(date)>2:
|
||||
if not articles.has_key(date):
|
||||
articles[date] = []
|
||||
articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
|
||||
self.log('\t\tAppend to : ', date)
|
||||
|
||||
|
||||
sorted_articles = sorted(articles)
|
||||
while sorted_articles:
|
||||
mostCurrent = sorted_articles.pop()
|
||||
self.title = 'CND ' + mostCurrent
|
||||
feeds.append((self.title, articles[mostCurrent]))
|
||||
|
||||
return feeds
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
header = soup.find('h3')
|
||||
self.log('header: ' + self.tag_to_string(header))
|
||||
pass
|
||||
|
@ -1,38 +1,89 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Common Dreams
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
# Feb 2012: Cleaned up the output to have only the main article
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
'''
|
||||
commondreams.org
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CommonDreams(BasicNewsRecipe):
|
||||
# Identify the recipe
|
||||
|
||||
title = u'Common Dreams'
|
||||
description = u'Progressive news and views'
|
||||
description = u'Breaking News & Views for the Progressive Community.'
|
||||
cover_url = 'https://s3.amazonaws.com/s3.commondreams.org/images/common-dreams.png'
|
||||
__author__ = u'XanthanGum'
|
||||
language = 'en'
|
||||
|
||||
# Format the text
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||
h1{font-size: xx-large;}
|
||||
h2{font-size: large;}
|
||||
'''
|
||||
|
||||
# Pick no article older than seven days and limit the number of articles per feed to 100
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# Remove everything before the article
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
|
||||
# Flattens all the tables to make it compatible with Nook
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
# Remove everything after the article
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'float:.*?'), lambda m: ''),
|
||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
||||
(re.compile(r'height:.*?px'), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda m: ''),
|
||||
(re.compile(r'</a>'), lambda m: ''),
|
||||
]
|
||||
|
||||
|
||||
# Main article is inside this tag
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':lambda x: x and 'node-' in x}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'node-links clear-block'}), # remove Share options
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
|
||||
|
||||
# Identify the news feeds
|
||||
|
||||
feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
|
||||
(u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'),
|
||||
(u'Views', u'http://www.commondreams.org/feed/views_rss'),
|
||||
(u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
|
||||
feeds = [(u'Headlines', u'https://www.commondreams.org/feed/headlines_rss'),
|
||||
(u'Further News Articles', u'https://www.commondreams.org/feed/further_rss'),
|
||||
(u'Views', u'https://www.commondreams.org/feed/views_rss'),
|
||||
(u'Progressive Newswire', u'https://www.commondreams.org/feed/newswire_rss')]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
url = url + '?print'
|
||||
return url
|
23
recipes/computerworld_pl.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Computerworld_pl(BasicNewsRecipe):
|
||||
title = u'Computerworld.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.computerworld.pl/')
|
||||
cover=soup.find(name='img', attrs={'class':'prawo'})
|
||||
self.cover_url=cover['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
71
recipes/consortium_news.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Consortium News
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
# Feb 2012: Initial release
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
'''
|
||||
consortiumnews.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ConsortiumNews(BasicNewsRecipe):
|
||||
|
||||
title = u'Consortium News'
|
||||
publisher = 'Copyright © 2012 Consortiumnews. All Rights Reserved.'
|
||||
language = 'en'
|
||||
__author__ = 'kiavash'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
conversion_options = {'linearize_tables' : True} # Flattens all the tables to make it compatible with Nook
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'float:.*?'), lambda m: ''),
|
||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
||||
(re.compile(r'height:.*?px'), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: ''),
|
||||
]
|
||||
|
||||
# Main article is inside this tag
|
||||
keep_only_tags = [dict(name='div', attrs={'id':lambda x: x and 'post-' in x})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'sociable'}), # remove 'Share this Article'
|
||||
dict(name='p', attrs={'class':'tags'}), # remove 'Tags: ... '
|
||||
]
|
||||
|
||||
feeds = [(u'Consortium News', u'http://feeds.feedburner.com/Consortiumnewscom')]
|
@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 21/12/11
|
||||
# greyscale code by Starson
|
||||
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
|
||||
no_stylesheets = True
|
||||
@ -31,7 +32,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
|
||||
dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
|
||||
dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
|
||||
dict(name='li',attrs={'class' : 'thumb'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
|
25
recipes/countryfile.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'Countryfile.com'
|
||||
cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 29/1/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#articles_are_obfuscated = True
|
||||
|
||||
remove_tags = [
|
||||
# dict(attrs={'class' : ['player']}),
|
||||
|
||||
]
|
||||
feeds = [
|
||||
(u'Homepage', u'http://www.countryfile.com/rss/home'),
|
||||
(u'Country News', u'http://www.countryfile.com/rss/news'),
|
||||
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
|
||||
]
|
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
description = 'News as provide by The Daily Mirror -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 30/10/11
|
||||
# last updated 11/2/12
|
||||
language = 'en_GB'
|
||||
|
||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
@ -13,45 +13,65 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 30
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 5
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
'''
|
||||
auto_cleanup = True
|
||||
#conversion_options = { 'linearize_tables' : True }
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'id' : 'body-content'})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
|
||||
#keep_only_tags = [
|
||||
# dict(name='h1'),
|
||||
# dict(name='div',attrs={'id' : 'body-content'}),
|
||||
#dict(name='div',atts={'class' : 'article-body'}),
|
||||
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
|
||||
#dict(name='p'),
|
||||
# ]
|
||||
|
||||
#remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
|
||||
dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
|
||||
dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
|
||||
dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
|
||||
dict(name='title'),
|
||||
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
|
||||
# dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
|
||||
#dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
|
||||
#dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
|
||||
]
|
||||
|
||||
# preprocess_regexps = [
|
||||
#(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
#preprocess_regexps = [
|
||||
#(re.compile(r'Sponsored Links', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'News', u'http://www.mirror.co.uk/news/rss.xml')
|
||||
,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
|
||||
,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
|
||||
,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
|
||||
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
|
||||
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
|
||||
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
|
||||
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
|
||||
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
|
||||
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
(u'UK News', u'http://feed43.com/0287771688643868.xml')
|
||||
,(u'Tech News', u'http://feed43.com/2455520588350501.xml')
|
||||
,(u'Weird World','http://feed43.com/0863800333634654.xml')
|
||||
,(u'Sport','http://feed43.com/7713243036546130.xml')
|
||||
,(u'Sport : Boxing ','http://feed43.com/0414732220804255.xml')
|
||||
,(u'Sport : Rugby Union','http://feed43.com/4710138762362383.xml')
|
||||
,(u'Sport : Other','http://feed43.com/4501416886323415.xml')
|
||||
,(u'TV and Film','http://feed43.com/5238302853765104.xml')
|
||||
,(u'Celebs','http://feed43.com/8770061048844683.xml')
|
||||
,(u'Life Style : Family','http://feed43.com/4356170742410338.xml')
|
||||
,(u'Travel','http://feed43.com/1436576006476607.xml')
|
||||
|
||||
|
||||
|
||||
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
h1{ font-size:18px;}
|
||||
img { display:block}
|
||||
'''
|
||||
|
||||
|
15
recipes/datasport.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Italian soccer news website - v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
|
||||
title = u'Datasport'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
|
11
recipes/derin_dusunce.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324913694(BasicNewsRecipe):
|
||||
title = u'Derin Dusunce'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Derin D\xfc\u015f\xfcnce', u'http://www.derindusunce.org/feed/')]
|
27
recipes/descopera_org.recipe
Normal file
@ -0,0 +1,27 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
descopera.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Descopera(BasicNewsRecipe):
|
||||
title = u'Descoperă.org'
|
||||
__author__ = 'Marius Ignătescu'
|
||||
description = 'Descoperă. Placerea de a cunoaște'
|
||||
publisher = 'descopera.org'
|
||||
category = 'science, technology, culture, history, earth'
|
||||
language = 'ro'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf8'
|
||||
no_stylesheets = True
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
|
||||
remove_attributes = ['width','height']
|
||||
cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
|
||||
feeds = [(u'Articles', u'http://www.descopera.org/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
21
recipes/desiring_god.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class DesiringGodEnglish(BasicNewsRecipe):
|
||||
title = u'Desiring God'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
|
||||
masthead_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
publisher = 'Desiring God Ministries'
|
||||
author = 'Desiring God Ministries'
|
||||
|
||||
feeds = [(u'Desiring God Blog', u'http://feeds.feedburner.com/DGBlog?format=xml')]
|
@ -7,6 +7,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
__licence__ ='GPL v3'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
encoding = 'utf-8'
|
||||
@ -16,7 +17,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
||||
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
|
||||
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']})]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
12
recipes/dunya_bizim.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324736687(BasicNewsRecipe):
|
||||
title = u'D\xfcnya Bizim'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Aktif \u0130mamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=31'), (u'Ayr\u0131nt\u0131 Defteri', u'http://dunyabizim.com/servisler/rss.php?kategoriID=58'), (u'Baba Kitaplar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=4'), (u'Bu da Oldu', u'http://dunyabizim.com/servisler/rss.php?kategoriID=32'), (u'\xc7-al\u0131nt\u0131 Yaz\u0131lar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=33'), (u'Dar\xfclmedya', u'http://dunyabizim.com/servisler/rss.php?kategoriID=49'), (u'Gidenler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=59'), (u'G\xfczel Mekanlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=43'), (u'\u0130yi Haberler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=18'), (u'\u0130yi M\xfczikler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=2'), (u'Kalite Dergiler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=3'), (u'Konu\u015fa Konu\u015fa', u'http://dunyabizim.com/servisler/rss.php?kategoriID=24'), (u'M\xfcstesta G\xfczeller', u'http://dunyabizim.com/servisler/rss.php?kategoriID=65'), (u'O \u015eimdi Nerede?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=52'), (u'Olsa Ke\u015fke', u'http://dunyabizim.com/servisler/rss.php?kategoriID=34'), (u'Orada Ne Oldu?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=38'), (u'\xd6nemli Adamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=1'), (u'Polemik', u'http://dunyabizim.com/servisler/rss.php?kategoriID=39'), (u'Sinema', u'http://dunyabizim.com/servisler/rss.php?kategoriID=23'), (u'Yalan Haber', u'http://dunyabizim.com/servisler/rss.php?kategoriID=40'), (u'Yeni \u015eeyler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=57'), (u'Zekeriya Sofras\u0131', u'http://dunyabizim.com/servisler/rss.php?kategoriID=60')]
|
12
recipes/dunya_bulteni.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1321194347(BasicNewsRecipe):
|
||||
title = u'D\xfcnya B\xfclteni'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=157'), (u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=153'), (u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=174'), (u'K\xfclt\xfcr-Sanat', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=66'), (u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=200'), (u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=123'), (u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=90'), (u'Aile Sa\u011fl\u0131k E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=75')]
|
67
recipes/dziennik_pl.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Dziennik_pl(BasicNewsRecipe):
|
||||
title = u'Dziennik.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://5.s.dziennik.pl/images/logos.png'
|
||||
cover_url= 'http://5.s.dziennik.pl/images/logos.png'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
remove_empty_feeds=True
|
||||
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
|
||||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">>>> CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
|
||||
keep_only_tags=[dict(id='article')]
|
||||
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
|
||||
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
|
||||
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
|
||||
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
|
||||
(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
|
||||
(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
|
||||
(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
|
||||
(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
|
||||
(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
|
||||
(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
|
||||
(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
|
||||
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
|
||||
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find('a', attrs={'class':'page_next'})
|
||||
if tag:
|
||||
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
|
||||
while tag:
|
||||
soup2= self.index_to_soup(tag['href'])
|
||||
tag=soup2.find('a', attrs={'class':'page_next'})
|
||||
if not tag:
|
||||
for r in appendtag.findAll('div', attrs={'class':'art_src'}):
|
||||
r.extract()
|
||||
pagetext = soup2.find(name='div', attrs={'class':'article_body'})
|
||||
for dictionary in self.remove_tags:
|
||||
v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
|
||||
for delete in v:
|
||||
delete.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if appendtag.find('div', attrs={'class':'article_paginator'}):
|
||||
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
|
||||
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
46
recipes/echo_online.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
|
||||
'''
|
||||
Fetch echo-online.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class Echo_Online(BasicNewsRecipe):
|
||||
title = u'Echo Online' # 2011-12-28 AGe
|
||||
description = '-Echo Online-'
|
||||
publisher = 'Echo Online GmbH'
|
||||
category = 'News, Germany'
|
||||
__author__ = 'Armin Geller' # 2011-12-28 AGe
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
encoding = 'iso-8859-1'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50 # 2011-12-28 AGe
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [
|
||||
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
|
||||
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
|
||||
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
|
||||
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
|
||||
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
|
||||
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
|
||||
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
|
||||
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
|
||||
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
|
||||
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
|
||||
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
|
||||
auto_cleanup_keep = '//div[@class="bild_gross w270"]'
|
||||
|
||||
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif'
|
||||
|
50
recipes/edge_conversations.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
|
||||
|
||||
'''
|
||||
Fetch Edge.org conversations
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EdgeConversationRSS(BasicNewsRecipe):
|
||||
title = u'Edge.org Conversations'
|
||||
__author__ = 'levien'
|
||||
language = 'en'
|
||||
description = '''Edge.org offers "open-minded, free ranging, intellectually
|
||||
playful ... an unadorned pleasure in curiosity, a collective expression of
|
||||
wonder at the living and inanimate world ... an ongoing and thrilling
|
||||
colloquium.'''
|
||||
oldest_article = 60
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'HomeLeftPannel IMGCTRL'}) ]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':'Logo'})
|
||||
]
|
||||
|
||||
feeds = [(u'Edge RSS', u'http://edge.org/feeds/')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('conversation/', 'conversation.php?cid=')
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
# Call parent's method.
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
# Loop through all feeds.
|
||||
for feed in feeds:
|
||||
|
||||
# Loop through all articles in feed.
|
||||
for article in feed.articles[:]:
|
||||
|
||||
# Remove anything that is not a conversation, and remove PDF files as well...
|
||||
|
||||
if not ('CONVERSATION' in article.title):
|
||||
feed.articles.remove(article)
|
||||
elif 'pdf' in article.url:
|
||||
feed.articles.remove(article)
|
||||
|
||||
return feeds
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,45 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Edmonton Journal
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
title = u'Edmonton Journal'
|
||||
url_prefix = 'http://www.edmontonjournal.com'
|
||||
description = u'News from Edmonton, AB'
|
||||
fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
#title = u'Calgary Herald'
|
||||
#url_prefix = 'http://www.calgaryherald.com'
|
||||
#description = u'News from Calgary, AB'
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -68,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
58
recipes/elet_es_irodalom.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
################################################################################
|
||||
#Description: http://es.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2012.01.20. - V1.2
|
||||
################################################################################
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class elet_es_irodalom(BasicNewsRecipe):
|
||||
title = u'\u00c9let \u00e9s Irodalom'
|
||||
__author__ = 'Bigpapa'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 30 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-2'
|
||||
category = 'Cikkek'
|
||||
language = 'hu'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
|
||||
needs_subscription = 'optional'
|
||||
|
||||
masthead_url = 'http://www.es.hu/images/logo.jpg'
|
||||
timefmt = ' [%Y %b %d, %a]'
|
||||
|
||||
#Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.es.hu/')
|
||||
br.select_form(name='userfrmlogin')
|
||||
br['cusername'] = self.username
|
||||
br['cpassword'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'target':['_TOP']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
|
||||
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
|
||||
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
|
||||
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
|
||||
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
|
||||
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
|
||||
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
|
||||
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
|
||||
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
|
||||
]
|
16
recipes/emuzica_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class eMuzyka(BasicNewsRecipe):
|
||||
title = u'eMuzyka'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
||||
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
needs_subscription = 'optional'
|
||||
encoding= 'ISO-8859-1'
|
||||
|
||||
remove_tags_before = dict(name='font', attrs={'class':'date'})
|
||||
@ -75,10 +75,9 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username and self.password:
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
raw = br.open(url).read()
|
||||
@ -100,7 +99,6 @@ class ESPN(BasicNewsRecipe):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
if 'eticket' in url:
|
||||
return url.partition('&')[0].replace('story?', 'print?')
|
||||
match = re.search(r'story\?(id=\d+)', url)
|
||||
|
37
recipes/fhm_uk.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'FHM UK'
|
||||
description = 'Good News for Men'
|
||||
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
|
||||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 17/3/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 28
|
||||
max_articles_per_feed = 12
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
#auto_cleanup = True
|
||||
#articles_are_obfuscated = True
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
|
||||
dict(name='div',attrs={'id' : ['articleLeft']}),
|
||||
dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
|
||||
|
||||
]
|
||||
|
||||
#remove_tags = [
|
||||
#dict(attrs={'class' : ['player']}),
|
||||
|
||||
#]
|
||||
feeds = [
|
||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
||||
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
|
||||
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||
#(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
||||
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
|
||||
]
|
@ -10,7 +10,8 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = '.hdrBig {font-size:22px;}'
|
||||
remove_empty_feeds=True
|
||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||
|
18
recipes/fisco_oggi.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
|
||||
title = u'Fisco Oggi'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
|
||||
|
@ -1,57 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Focus_pl(BasicNewsRecipe):
|
||||
title = u'Focus.pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
class FocusRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='polish scientific monthly magazine'
|
||||
version = 1
|
||||
|
||||
title = u'Focus'
|
||||
publisher = u'Gruner + Jahr Polska'
|
||||
category = u'News'
|
||||
description = u'Newspaper'
|
||||
category='magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
|
||||
remove_tags_after=dict(name='div', attrs={'class':'clear'})
|
||||
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
|
||||
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
# Seems to work best, but YMMV
|
||||
simultaneous_downloads = 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
|
||||
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: left;}
|
||||
h2{font-size: medium; font-weight: bold;}
|
||||
p.lead {font-weight: bold; text-align: left;}
|
||||
.authordate {font-size: small; color: #696969;}
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
|
||||
feeds = [
|
||||
('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a')
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
|
||||
return new_soup
|
||||
|
||||
def append_page(self, appendtag):
|
||||
tag=appendtag.find(name='div', attrs={'class':'arrows'})
|
||||
if tag:
|
||||
nexturl='http://www.focus.pl/'+tag.a['href']
|
||||
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
nexturl=None
|
||||
pagetext=soup2.find(name='div', attrs={'class':'txt'})
|
||||
tag=pagetext.find(name='div', attrs={'class':'arrows'})
|
||||
for r in tag.findAll(name='a'):
|
||||
if u'Następne' in r.string:
|
||||
nexturl='http://www.focus.pl/'+r['href']
|
||||
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if ('advertisement' in soup.find('title').string.lower()):
|
||||
href = soup.find('a').get('href')
|
||||
return self.index_to_soup(href, raw=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_cover_url(self):
|
||||
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
|
||||
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
|
||||
self.cover_url='http://www.focus.pl/' + tag.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup.body)
|
||||
return soup
|
||||
def print_version(self, url):
|
||||
if url.count ('focus.pl.feedsportal.com'):
|
||||
u = url.find('focus0Bpl')
|
||||
u = 'http://www.focus.pl/' + url[u + 11:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace ('0E','-')
|
||||
u = u.replace('/nc/1//story01.htm', '/do-druku/1')
|
||||
else:
|
||||
u = url.replace('/nc/1','/do-druku/1')
|
||||
return u
|
@ -3,10 +3,17 @@ import re
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
''' there are three modifications:
|
||||
1) fetch issue cover
|
||||
2) toggle ignore premium articles
|
||||
3) extract proper section names, ie. "Comments", "Essay"
|
||||
|
||||
by Chen Wei weichen302@gmx.com, 2012-02-05'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 1
|
||||
version = 1.01
|
||||
|
||||
title = u'Foreign Affairs (Subcription or (free) Registration)'
|
||||
publisher = u'Council on Foreign Relations'
|
||||
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
|
||||
INDEX = 'http://www.foreignaffairs.com'
|
||||
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
|
||||
INCLUDE_PREMIUM = False
|
||||
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'base'))
|
||||
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
|
||||
img_url = div.find('img')['src']
|
||||
return self.INDEX + img_url
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
@ -50,57 +66,46 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
return self.temp_files[-1].name
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
|
||||
articles = []
|
||||
answer = []
|
||||
content = soup.find('div', attrs = {'class': 'center-wrapper'})
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
|
||||
for sec in sec_start:
|
||||
content = sec.nextSibling
|
||||
if content:
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tag = div.find('div', attrs = {'class': 'views-field-title'})
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
# If they ever fix their markup, this will break :-(
|
||||
summary = self.tag_to_string(tag.findNextSibling('p'))
|
||||
description = author + '<br/>' + summary
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Magazine', articles))
|
||||
|
||||
ul = content.find('ul')
|
||||
if ul:
|
||||
section = self.tag_to_string(content.find('h2'))
|
||||
articles = []
|
||||
for li in ul.findAll('li'):
|
||||
tag = li.find('div', attrs = {'class': 'views-field-title'})
|
||||
|
||||
tags = []
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tags.append(div)
|
||||
for li in content.findAll('li'):
|
||||
tags.append(li)
|
||||
|
||||
for div in tags:
|
||||
title = url = description = author = None
|
||||
|
||||
if self.INCLUDE_PREMIUM:
|
||||
found_premium = False
|
||||
else:
|
||||
found_premium = div.findAll('span', attrs={'class':
|
||||
'premium-icon'})
|
||||
if not found_premium:
|
||||
tag = div.find('div', attrs={'class': 'views-field-title'})
|
||||
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
description = ''
|
||||
tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
|
||||
if tag:
|
||||
description = self.tag_to_string(tag)
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Letters to the Editor', articles))
|
||||
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
description = self.tag_to_string(tag_summary)
|
||||
articles.append({'title':title, 'date':None, 'url':url,
|
||||
'description':description, 'author':author})
|
||||
if articles:
|
||||
answer.append((section, articles))
|
||||
return answer
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
21
recipes/gameplay_pl.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Gameplay_pl(BasicNewsRecipe):
|
||||
title = u'Gameplay.pl'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
||||
category = 'games, movies, books, music'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if 'http' not in url:
|
||||
return 'http://gameplay.pl'+ url[2:]
|
||||
else:
|
||||
return url
|
@ -4,10 +4,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
language = 'pl'
|
||||
description ='news from gazeta.pl'
|
||||
category='newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
INDEX='http://wyborcza.pl'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 3
|
||||
@ -81,3 +82,10 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
return url
|
||||
else:
|
||||
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
|
||||
cover=soup.find(id='GWmini2')
|
||||
soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
|
||||
self.cover_url='http://wyborcza.pl' + soup.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlasgowHerald(BasicNewsRecipe):
|
||||
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
language = 'en_GB'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
remove_tags = [
|
||||
dict(id=['pic-nav']),
|
||||
dict(attrs={'class':['comments-top']})
|
||||
]
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
#keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
#remove_tags = [
|
||||
#dict(id=['pic-nav']),
|
||||
#dict(attrs={'class':['comments-top']})
|
||||
#]
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -26,4 +29,3 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
u'http://www.heraldscotland.com/cmlink/1.768',),
|
||||
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
|
||||
|
||||
|
||||
|
@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
{'class':['articleTools', 'pagination', 'Ads', 'topad',
|
||||
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
|
||||
|
||||
#Use the mobile version rather than the web version
|
||||
def print_version(self, url):
|
||||
return url.rpartition('?')[0] + '?service=mobile'
|
||||
|
13
recipes/goal.recipe
Normal file
@ -0,0 +1,13 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325677767(BasicNewsRecipe):
|
||||
title = u'Goal'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_after = [dict(id='article_content')]
|
||||
feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Sports news from Italy'
|
||||
|
76
recipes/grantland.recipe
Normal file
@ -0,0 +1,76 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GrantLand(BasicNewsRecipe):
|
||||
title = u"Grantland"
|
||||
description = 'Writings on Sports & Pop Culture'
|
||||
language = 'en'
|
||||
__author__ = 'barty on mobileread.com forum'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
# auto_cleanup is too aggressive sometimes and we end up with blank articles
|
||||
auto_cleanup = False
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
oldest_article = 90
|
||||
|
||||
cover_url = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
|
||||
masthead_url = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
|
||||
|
||||
INDEX = 'http://www.grantland.com'
|
||||
CATEGORIES = [
|
||||
# comment out second line if you don't want older articles
|
||||
# (user friendly name, url suffix, max number of articles to load)
|
||||
('Today in Grantland','',20),
|
||||
('In Case You Missed It','incaseyoumissedit',35),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
{'name':['style','aside','nav','footer','script']},
|
||||
{'name':'h1','text':'Grantland'},
|
||||
{'id':['header','col-right']},
|
||||
{'class':['connect_widget']},
|
||||
{'name':'section','class':re.compile(r'\b(ad|module)\b')},
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
# remove blog banners
|
||||
(re.compile(r'<a href="/blog/(?:(?!</a>).)+</a>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
seen_urls = set([])
|
||||
|
||||
for category in self.CATEGORIES:
|
||||
|
||||
(cat_name, tag, max_articles) = category
|
||||
self.log('Reading category:', cat_name)
|
||||
articles = []
|
||||
|
||||
page = "%s/%s" % (self.INDEX, tag)
|
||||
soup = self.index_to_soup(page)
|
||||
|
||||
main = soup.find('div',id='col-main')
|
||||
if main is None:
|
||||
main = soup
|
||||
|
||||
for tag in main.findAll('a', href=re.compile(r'(story|post)/_/id/\d+')):
|
||||
url = tag['href']
|
||||
if url in seen_urls:
|
||||
continue
|
||||
title = tag.string
|
||||
# blank title probably means <a href=".."><img /></a>. skip
|
||||
if not title:
|
||||
continue
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t', url)
|
||||
articles.append({'title':title,'url':url})
|
||||
seen_urls.add(url)
|
||||
|
||||
if len(articles) >= max_articles:
|
||||
break
|
||||
|
||||
if articles:
|
||||
feeds.append((cat_name, articles))
|
||||
|
||||
return feeds
|
@ -8,29 +8,31 @@ class Gry_online_pl(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 13
|
||||
INDEX= 'http://www.gry-online.pl/'
|
||||
cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
|
||||
masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
|
||||
cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'p.wn1{font-size:22px;}'
|
||||
remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
|
||||
#remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'gc660'})]
|
||||
remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
|
||||
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = soup.find('a', attrs={'class':'num_str_nex'})
|
||||
if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
|
||||
appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
|
||||
if nexturl is not None:
|
||||
if 'strona' in nexturl.div.string:
|
||||
nexturl= self.INDEX + nexturl['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
|
||||
for tag in pagetext:
|
||||
tag = appendtag.find('div', attrs={'class':'n5p'})
|
||||
if tag:
|
||||
nexturls=tag.findAll('a')
|
||||
for nexturl in nexturls[1:]:
|
||||
try:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
|
||||
except:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class':'gc660'})
|
||||
for r in pagetext.findAll(name='header'):
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, tag)
|
||||
self.append_page(soup2, appendtag)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
|
||||
r.extract()
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -79,6 +79,12 @@ class Guardian(BasicNewsRecipe):
|
||||
url = None
|
||||
return url
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
# multiple html sections in soup, useful stuff in the first
|
||||
|
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
from urlparse import urlparse
|
||||
import re
|
||||
|
||||
class HackerNews(BasicNewsRecipe):
|
||||
title = 'Hacker News'
|
||||
__author__ = 'Tom Scholl'
|
||||
class HNWithCommentsLink(BasicNewsRecipe):
|
||||
title = 'HN With Comments Link'
|
||||
__author__ = 'Tom Scholl & David Kerschner'
|
||||
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
|
||||
publisher = 'Y Combinator'
|
||||
category = 'news, programming, it, technology'
|
||||
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
|
||||
body = body + comments
|
||||
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
|
||||
|
||||
def parse_feeds(self):
|
||||
a = super(HNWithCommentsLink, self).parse_feeds()
|
||||
self.hn_articles = a[0].articles
|
||||
return a
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
if url.startswith('http://news.ycombinator.com'):
|
||||
content = self.get_hn_content(url)
|
||||
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
|
||||
else:
|
||||
content = self.get_readable_content(url)
|
||||
|
||||
article = 0
|
||||
for a in self.hn_articles:
|
||||
if a.url == url:
|
||||
article = a
|
||||
|
||||
content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(content)
|
||||
self.temp_files[-1].close()
|
||||
|
11
recipes/haksoz.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324739199(BasicNewsRecipe):
|
||||
title = u'Haks\xf6z'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
feeds = [(u'Haks\xf6z', u'http://www.haksozhaber.net/rss/')]
|
58
recipes/hamilton_spectator.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Hamilton Spectator Calibre Recipe
|
||||
'''
|
||||
class HamiltonSpectator(BasicNewsRecipe):
|
||||
title = u'Hamilton Spectator'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
__author__ = u'Eric Coolman'
|
||||
publisher = u'thespec.com'
|
||||
description = u'Ontario Canada Newspaper'
|
||||
category = u'News, Ontario, Canada'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_CA'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
|
||||
(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
|
||||
(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
|
||||
(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
|
||||
(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
|
||||
(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
|
||||
(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
|
||||
(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
|
||||
(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
|
||||
(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
|
||||
(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
|
||||
(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
|
||||
(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
|
||||
(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
|
||||
(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
|
||||
(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
|
||||
(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
|
||||
(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
|
||||
(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
|
||||
(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
|
||||
(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
|
||||
(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
|
||||
(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
|
||||
(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
|
||||
(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
|
||||
(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
|
||||
(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
|
||||
(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
|
||||
(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
|
||||
(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
|
||||
(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
|
||||
(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
|
||||
(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
|
||||
(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
|
||||
(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
|
||||
]
|
||||
|
43
recipes/high_country_news.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
|
||||
|
||||
'''
|
||||
Fetch High Country News
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class HighCountryNews(BasicNewsRecipe):
|
||||
|
||||
title = u'High Country News'
|
||||
description = u'News from the American West'
|
||||
__author__ = 'Armin Geller' # 2012-01-31
|
||||
publisher = 'High Country News'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
language = 'en-Us'
|
||||
encoding = 'UTF-8'
|
||||
publication_type = 'newspaper'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.hcn.org/logo.jpg' # 2012-01-31 AGe add
|
||||
cover_source = 'http://www.hcn.org' # 2012-01-31 AGe add
|
||||
|
||||
def get_cover_url(self): # 2012-01-31 AGe add
|
||||
cover_source_soup = self.index_to_soup(self.cover_source)
|
||||
preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
|
||||
return preview_image_div.div.img['src']
|
||||
|
||||
feeds = [
|
||||
(u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'),
|
||||
(u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue'),
|
||||
|
||||
(u'Writers on the Range', u'http://feeds.feedburner.com/hcn/wotr'),
|
||||
(u'High Country Views', u'http://feeds.feedburner.com/hcn/HighCountryViews'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print_view'
|
||||
|
@ -1,4 +1,5 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import urllib, re
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Hindustan Times'
|
||||
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
|
||||
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''
|
||||
HT uses a variant of the feedportal RSS ad display mechanism
|
||||
'''
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
res = self.browser.open_novisit(url)
|
||||
url = res.geturl().split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
|
||||
'www.'}
|
||||
for k, v in encoding.iteritems():
|
||||
url = url.replace(k, v)
|
||||
return url
|
||||
|
||||
|
||||
|
@ -1,44 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
################################################################################
|
||||
#Description: http://hvg.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2011.12.20. - V1.1
|
||||
################################################################################
|
||||
|
||||
class HVG(BasicNewsRecipe):
|
||||
title = 'HVG.HU'
|
||||
__author__ = u'István Papp'
|
||||
description = u'Friss hírek a HVG-től'
|
||||
timefmt = ' [%Y. %b. %d., %a.]'
|
||||
oldest_article = 4
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class hvg(BasicNewsRecipe):
|
||||
title = u'HVG'
|
||||
__author__ = 'Bigpapa'
|
||||
language = 'hu'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 5 # Hany napos legyen a legregebbi cikk amit leszedjen.
|
||||
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'HVG Online'
|
||||
category = u'news, hírek, hvg'
|
||||
extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
remove_tags_before = dict(id='pg-content')
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
extra_css = ' h2 { font:bold 28px} '
|
||||
|
||||
feeds = [
|
||||
(u'Itthon', u'http://hvg.hu/rss/itthon')
|
||||
,(u'Világ', u'http://hvg.hu/rss/vilag')
|
||||
,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
|
||||
,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
|
||||
,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
|
||||
,(u'Karrier', u'http://hvg.hu/rss/karrier')
|
||||
,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
|
||||
,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
|
||||
,(u'Kultúra', u'http://hvg.hu/rss/kultura')
|
||||
,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
|
||||
,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
|
||||
,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
|
||||
,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
|
||||
,(u'Sport', u'http://hvg.hu/rss/sport')
|
||||
remove_attributes = ['style','font', 'href']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['pg-content']})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace ('#rss', '/print')
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
|
||||
dict(name='table', attrs={'class':['banner2', 'monocle']}),
|
||||
dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
|
||||
dict(name='h3', attrs={'class':['hthree']}),
|
||||
dict(name='ul', attrs={'class':['defaultul']}),
|
||||
dict(name='form', attrs={'id':['commentForm']}),
|
||||
dict(name='h6', attrs={'class':['hthree']}),
|
||||
dict(name='h6', attrs={'class':['more2']}),
|
||||
dict(name='img', attrs={'class':['framed']}),
|
||||
dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
|
||||
|
||||
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
# (u'\xd6sszes', 'http://hvg.hu/rss'),
|
||||
(u'Itthon', 'http://hvg.hu/rss/itthon'),
|
||||
(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
|
||||
(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
|
||||
(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
|
||||
(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
|
||||
(u'Karrier', 'http://hvg.hu/rss/karrier'),
|
||||
(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
|
||||
(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
|
||||
(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
|
||||
(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
|
||||
(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
|
||||
(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
|
||||
(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
|
||||
(u'Sport', 'http://hvg.hu/rss/sport')
|
||||
]
|
BIN
recipes/icons/asianreviewofbooks.png
Normal file
After Width: | Height: | Size: 906 B |
BIN
recipes/icons/biolog_pl.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/blues.png
Normal file
After Width: | Height: | Size: 910 B |
BIN
recipes/icons/ciekawostki_historyczne.png
Normal file
After Width: | Height: | Size: 994 B |
BIN
recipes/icons/computerworld_pl.png
Normal file
After Width: | Height: | Size: 373 B |
BIN
recipes/icons/descopera_org.png
Normal file
After Width: | Height: | Size: 9.3 KiB |
BIN
recipes/icons/dziennik_pl.png
Normal file
After Width: | Height: | Size: 481 B |
BIN
recipes/icons/gameplay_pl.png
Normal file
After Width: | Height: | Size: 991 B |
BIN
recipes/icons/in4_pl.png
Normal file
After Width: | Height: | Size: 357 B |
BIN
recipes/icons/informacje_usa.png
Normal file
After Width: | Height: | Size: 808 B |
BIN
recipes/icons/kosmonauta_pl.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/kresy_pl.png
Normal file
After Width: | Height: | Size: 4.0 KiB |
BIN
recipes/icons/mediapart.png
Normal file
After Width: | Height: | Size: 382 B |
Before Width: | Height: | Size: 712 B After Width: | Height: | Size: 712 B |
BIN
recipes/icons/mlody_technik_pl.png
Normal file
After Width: | Height: | Size: 2.1 KiB |
BIN
recipes/icons/moneynews.png
Normal file
After Width: | Height: | Size: 914 B |
BIN
recipes/icons/novilist_novine_hr.png
Normal file
After Width: | Height: | Size: 241 B |
BIN
recipes/icons/novilist_portal_hr.png
Normal file
After Width: | Height: | Size: 944 B |
BIN
recipes/icons/oclab_pl.png
Normal file
After Width: | Height: | Size: 881 B |
BIN
recipes/icons/overclock_pl.png
Normal file
After Width: | Height: | Size: 817 B |
BIN
recipes/icons/palmtop_pl.png
Normal file
After Width: | Height: | Size: 366 B |
BIN
recipes/icons/pc_arena.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/pc_centre_pl.png
Normal file
After Width: | Height: | Size: 2.8 KiB |
BIN
recipes/icons/pc_foster.png
Normal file
After Width: | Height: | Size: 694 B |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 289 B |
BIN
recipes/icons/polska_times.png
Normal file
After Width: | Height: | Size: 322 B |
BIN
recipes/icons/pure_pc.png
Normal file
After Width: | Height: | Size: 386 B |
BIN
recipes/icons/racjonalista_pl.png
Normal file
After Width: | Height: | Size: 850 B |
BIN
recipes/icons/rionegro.png
Normal file
After Width: | Height: | Size: 817 B |
BIN
recipes/icons/rue89.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/samanyolu_haber.png
Normal file
After Width: | Height: | Size: 968 B |