merge with John's branch
12
COPYRIGHT
@ -9,6 +9,12 @@ License: GPL-2 or later
|
|||||||
The full text of the GPL is distributed as in
|
The full text of the GPL is distributed as in
|
||||||
/usr/share/common-licenses/GPL-2 on Debian systems.
|
/usr/share/common-licenses/GPL-2 on Debian systems.
|
||||||
|
|
||||||
|
Files: setup/iso_639/*
|
||||||
|
Copyright: Various
|
||||||
|
License: LGPL 2.1
|
||||||
|
The full text of the LGPL is distributed as in
|
||||||
|
/usr/share/common-licenses/LGPL-2.1 on Debian systems.
|
||||||
|
|
||||||
Files: src/calibre/ebooks/BeautifulSoup.py
|
Files: src/calibre/ebooks/BeautifulSoup.py
|
||||||
Copyright: Copyright (c) 2004-2007, Leonard Richardson
|
Copyright: Copyright (c) 2004-2007, Leonard Richardson
|
||||||
License: BSD
|
License: BSD
|
||||||
@ -28,6 +34,12 @@ License: other
|
|||||||
are permitted in any medium without royalty provided the copyright
|
are permitted in any medium without royalty provided the copyright
|
||||||
notice and this notice are preserved.
|
notice and this notice are preserved.
|
||||||
|
|
||||||
|
Files: src/calibre/ebooks/readability/*
|
||||||
|
Copyright: Unknown
|
||||||
|
License: Apache 2.0
|
||||||
|
The full text of the Apache 2.0 license is available at:
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Files: /src/cherrypy/*
|
Files: /src/cherrypy/*
|
||||||
Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org)
|
Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org)
|
||||||
Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
||||||
|
147
Changelog.yaml
@ -19,6 +19,153 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.16
|
||||||
|
date: 2011-08-26
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "News download: Add algorithms to automatically clean up downloaded HTML"
|
||||||
|
description: "Use the algorithms from the Readability project to automatically cleanup downloaded HTML. You can turn this on in your own recipes by adding auto_cleanup=True to the recipe. It is turned on by default for basic recipes created via the GUI. This makes it a little easier to develop recipes for beginners."
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "Add an option to Preferences->Look and Feel->Cover Browser to show the cover browser full screen. When showing the cover browser in a separate window, you can make it fullscreen by pressing the F11 key."
|
||||||
|
tickets: [829855 ]
|
||||||
|
|
||||||
|
- title: "Show the languages currently used at the top of the drop down list of languages"
|
||||||
|
|
||||||
|
- title: "When automatically computing author sort from author's name, if the name contains certain words like Inc., Company, Team, etc. use the author name as the sort string directly. The list of such words can be controlled via Preferences->Tweaks."
|
||||||
|
tickets: [797895]
|
||||||
|
|
||||||
|
- title: "Add a search for individual tweaks to Preferences->Tweaks"
|
||||||
|
|
||||||
|
- title: "Drivers for a few new android phones"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix line unwrapping algorithms to account for some central European characters as well."
|
||||||
|
tickets: [822744]
|
||||||
|
|
||||||
|
- title: "Switch to using more modern language names/translations from the iso-codes package"
|
||||||
|
|
||||||
|
- title: "Allow cases insensitive entering of language names for convenience."
|
||||||
|
tickets: [832761]
|
||||||
|
|
||||||
|
- title: "When adding a text indent to paragraphs as part of the remove spacing between paragraphs transformation, do not add an indent to paragraphs that are directly centered or right aligned."
|
||||||
|
tickets: [830439]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: More robust handling of case insensitive tag and class css selectors"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Add support for the start attribute on <ol> tags"
|
||||||
|
|
||||||
|
- title: "When adding books that have no language specified, do not automatically set the language to calibre's interface language."
|
||||||
|
tickets: [830092]
|
||||||
|
|
||||||
|
- title: "Fix use of tag browser to search for languages when calibre is translated to a non English language"
|
||||||
|
tickets: [830078]
|
||||||
|
|
||||||
|
- title: "When downloading news, set the language field correctly"
|
||||||
|
|
||||||
|
- title: "Fix languages field in the Edit metadata dialog too wide"
|
||||||
|
tickets: [829912]
|
||||||
|
|
||||||
|
- title: "Fix setting of languages that have commas in their names broken"
|
||||||
|
|
||||||
|
- title: "FB2 Input: When convert FB2 files, read the cover from the FB2 file correctly."
|
||||||
|
tickets: [829240]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Politifact
|
||||||
|
- Reuters
|
||||||
|
- Sueddeutsche
|
||||||
|
- CNN
|
||||||
|
- Financial Times UK
|
||||||
|
- MSDN Magazine
|
||||||
|
- Houston Chronicle
|
||||||
|
- Harvard Business Review
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: CBN News and Fairbanks Daily
|
||||||
|
author: by Roger
|
||||||
|
|
||||||
|
- title: Hacker News
|
||||||
|
author: Tom Scholl
|
||||||
|
|
||||||
|
- title: Various Turkish news sources
|
||||||
|
author: thomass
|
||||||
|
|
||||||
|
- title: Cvece Zla
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Various Polish news sources
|
||||||
|
author: fenuks
|
||||||
|
|
||||||
|
- title: Fluter
|
||||||
|
author: Armin Geller
|
||||||
|
|
||||||
|
- title: Brasil de Fato
|
||||||
|
author: Alex Mitrani
|
||||||
|
|
||||||
|
- version: 0.8.15
|
||||||
|
date: 2011-08-19
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Add a 'languages' metadata field."
|
||||||
|
type: major
|
||||||
|
description: "This is useful if you have a multi-lingual book collection. You can now set one or more languages per book via the Edit Metadata dialog. If you want the languages
|
||||||
|
column to be visible, then go to Preferences->Add your own columns and unhide the languages columns. You can also bulk set the languages on multiple books via the bulk edit metadata dialog. You can also have the languages show up in the book details panel on the right by going to Preferences->Look and Feel->Book details"
|
||||||
|
|
||||||
|
- title: "Get Books: Add XinXii store."
|
||||||
|
|
||||||
|
- title: "Metadata download plugin for ozon.ru, enabled only when user selects russian as their language in the welcome wizard."
|
||||||
|
|
||||||
|
- title: "Bambook driver: Allow direct transfer of PDF files to Bambook devices"
|
||||||
|
|
||||||
|
- title: "Driver for Coby MID7015A and Asus EEE Note"
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: The keyboard shortcut Ctrl+D can now be used to trigger a metadata download. Also show the row number of the book being edited in the titlebar"
|
||||||
|
|
||||||
|
- title: "Add an option to not preserve the date when using the 'Copy to Library' function (found in Preferences->Adding books)"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Linux binary: Use readlink -f rather than readlink -e in the launcher scripts so that they work with recent releases of busybox"
|
||||||
|
|
||||||
|
- title: "When bulk downloading metadata for more than 100 books at a time, automatically split up the download into batches of 100."
|
||||||
|
tickets: [828373]
|
||||||
|
|
||||||
|
- title: "When deleting books from the Kindle also delete 'sidecar' .apnx and .ph1 files as the kindle does not clean them up automatically"
|
||||||
|
tickets: [827684]
|
||||||
|
|
||||||
|
- title: "Fix a subtle bug in the device drivers that caused calibre to lose track of some books on the device if you used author_sort in the send to device template and your books have author sort values that differ only in case."
|
||||||
|
tickets: [825706]
|
||||||
|
|
||||||
|
- title: "Fix scene break character pattern not saved in conversion preferences"
|
||||||
|
tickets: [826038]
|
||||||
|
|
||||||
|
- title: "Keyboard shortcuts: Fix a bug triggered by some third party plugins that made the keyboard preferences unusable in OS X."
|
||||||
|
tickets: [826325]
|
||||||
|
|
||||||
|
- title: "Search box: Fix completion no longer working after using Tag Browser to do a search. Also ensure that completer popup is always hidden when a search is performed."
|
||||||
|
|
||||||
|
- title: "Fix pressing Enter in the search box causes the same search to be executed twice in the plugins and keyboard shortcuts preferences panels"
|
||||||
|
|
||||||
|
- title: "Catalog generation: Fix error creating epub/mobi catalogs on non UTF-8 windows systems when the metadata contained non ASCII characters"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Financial Times UK
|
||||||
|
- La Tercera
|
||||||
|
- Folha de Sao Paolo
|
||||||
|
- Metro niews NL
|
||||||
|
- La Nacion
|
||||||
|
- Juventud Rebelde
|
||||||
|
- Rzeczpospolita Online
|
||||||
|
- Newsweek Polska
|
||||||
|
- CNET news
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: El Mostrador and The Clinic
|
||||||
|
author: Alex Mitrani
|
||||||
|
|
||||||
|
- title: Patente de Corso
|
||||||
|
author: Oscar Megia Lopez
|
||||||
|
|
||||||
- version: 0.8.14
|
- version: 0.8.14
|
||||||
date: 2011-08-12
|
date: 2011-08-12
|
||||||
|
|
||||||
|
12
recipes/android_com_pl.recipe
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Android_com_pl(BasicNewsRecipe):
|
||||||
|
title = u'Android.com.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'Android.com.pl - biggest polish Android site'
|
||||||
|
category = 'Android, mobile'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
|
15
recipes/bash_org_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class Bash_org_pl(BasicNewsRecipe):
|
||||||
|
title = u'Bash.org.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'Bash.org.pl - funny quotations from IRC discussions'
|
||||||
|
category = 'funny quotations, humour'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 15
|
||||||
|
cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
|
||||||
|
feeds = [(u'Cytaty', u'http://bash.org.pl/rss')]
|
@ -36,8 +36,9 @@ class BBC(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
|
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
|
||||||
'story-feature wide ', 'story-feature narrow']})
|
'story-feature wide ', 'story-feature narrow']}),
|
||||||
|
dict(id=['hypertab', 'comment-form']),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height']
|
||||||
|
31
recipes/brasil_de_fato.recipe
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BrasilDeFato(BasicNewsRecipe):
|
||||||
|
news = True
|
||||||
|
title = u'Brasil de Fato'
|
||||||
|
__author__ = 'Alex Mitrani'
|
||||||
|
description = u'Uma visão popular do Brasil e do mundo.'
|
||||||
|
publisher = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
|
||||||
|
category = 'news, politics, Brazil, rss, Portuguese'
|
||||||
|
oldest_article = 10
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
summary_length = 1000
|
||||||
|
language = 'pt_BR'
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'links'})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'links'})]
|
||||||
|
|
||||||
|
feeds = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
|
||||||
|
,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
|
||||||
|
,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
|
||||||
|
,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
|
||||||
|
,(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
|
||||||
|
]
|
57
recipes/bugun_gazetesi.recipe
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Bugun (BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'BUGÜN Gazetesi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed =100
|
||||||
|
#no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
publisher = 'thomass'
|
||||||
|
category = 'news, haberler,TR,gazete'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'newspaper '
|
||||||
|
extra_css = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
|
||||||
|
#introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
|
||||||
|
masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
|
||||||
|
|
||||||
|
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||||
|
#remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||||
|
|
||||||
|
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
remove_empty_feeds= True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
|
||||||
|
( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
|
||||||
|
( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
|
||||||
|
( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
|
||||||
|
( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
|
||||||
|
( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
|
||||||
|
( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
|
||||||
|
( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
|
||||||
|
( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
|
||||||
|
( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
|
||||||
|
( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
|
||||||
|
( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
|
||||||
|
( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
]
|
73
recipes/cbn.recipe
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CBN(BasicNewsRecipe):
|
||||||
|
title = u'CBN News'
|
||||||
|
__author__ = 'Roger'
|
||||||
|
# TODO: I just noticed this is downloading 25+ articles, while
|
||||||
|
# the online site is only publishing at most 7 articles daily.
|
||||||
|
# So, somehow this needs to be fixed it only downloads max 7 articles
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
description = 'The Christian Broadcasting Network'
|
||||||
|
publisher = 'http://www.cbn.com/'
|
||||||
|
category = 'news, religion, spiritual, christian'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
# Make article titles, author and date bold, italic or small font.
|
||||||
|
# TODO: Could use a smaller title text
|
||||||
|
# TODO: Italicize Author and Publisher?
|
||||||
|
#
|
||||||
|
# http://www.cbn.com/App_Themes/Common/base.css,
|
||||||
|
# http://www.cbn.com/App_Themes/CBNNews/article.css",
|
||||||
|
# ... and many more style sheets.
|
||||||
|
#extra_css = '''
|
||||||
|
# .story_item_headline { font-size: medium; font-weight: bold; }
|
||||||
|
# .story_item_author { font-size: small; font-style:italic; }
|
||||||
|
# .signature_line { font-size: small; }
|
||||||
|
# '''
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'iso-8859-1'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
|
||||||
|
# TODO: No masterhead_url for CBN, using one I grepped from a news article
|
||||||
|
# (There's a better/higher contrast blue on white background image, but
|
||||||
|
# can't get it or it's too big -- embedded into a larger jpeg?)
|
||||||
|
masthead_url = 'http://www.cbn.com/templates/images/cbn_com_logo.jpg'
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'id':'articleTitle'}),
|
||||||
|
dict(name='div', attrs={'class':'articleAuthor'}),
|
||||||
|
dict(name='div', attrs={'class':'articleDate'}),
|
||||||
|
dict(name='div', attrs={'class':'articleText'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
# The article image is usually Adobe Flash Player Image
|
||||||
|
# The snapshot .jpg image files of the video are found
|
||||||
|
# within a URL folder named "PageFiles_Files"
|
||||||
|
# Filter this for now.
|
||||||
|
# (Majority of images seem to be Adobe Flash.)
|
||||||
|
dict(name='div', attrs={'class':'articleImage'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Comment-out or uncomment any of the following RSS feeds according to your
|
||||||
|
# liking.
|
||||||
|
# A full list can be found here: http://www.cbn.com/rss.aspx
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'World', u'http://www.cbn.com/cbnnews/world/feed/'),
|
||||||
|
(u'US', u'http://www.cbn.com/cbnnews/us/feed/'),
|
||||||
|
(u'Inside Israel', u'http://www.cbn.com/cbnnews/insideisrael/feed/'),
|
||||||
|
(u'Politics', u'http://www.cbn.com/cbnnews/politics/feed/'),
|
||||||
|
(u'Christian World News', u'http://www.cbn.com/cbnnews/shows/cwn/feed/'),
|
||||||
|
(u'Health and Science', u'http://www.cbn.com/cbnnews/healthscience/feed/'),
|
||||||
|
(u'Finance', u'http://www.cbn.com/cbnnews/finance/feed/'),
|
||||||
|
]
|
||||||
|
|
16
recipes/cd_action.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CD_Action(BasicNewsRecipe):
|
||||||
|
title = u'CD-Action'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'cdaction.pl - polish magazine about games site'
|
||||||
|
category = 'games'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
|
||||||
|
keep_only_tags= dict(id='news_content')
|
||||||
|
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
|
||||||
|
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
|
@ -28,11 +28,12 @@ class CNN(BasicNewsRecipe):
|
|||||||
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
|
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(id='cnnContentContainer')]
|
keep_only_tags = [dict(id=['cnnContentContainer', 'storycontent'])]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
||||||
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
||||||
'cnn_strycntntrgt']},
|
'cnn_strycntntrgt', 'hed_side', 'foot']},
|
||||||
|
dict(id=['ie_column']),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
47
recipes/cvecezla.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
cvecezla.wordpress.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CveceZla(BasicNewsRecipe):
|
||||||
|
title = 'Cvece zla i naopakog'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Haoticnost razmisljanja poradja haoticnost pisanja. Muzika, stripovi, igre, knjige, generalno glupiranje...'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{display: block } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'igre, muzika, film, blog, Srbija'
|
||||||
|
, 'publisher': 'Mehmet Krljic'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
remove_tags_before = dict(attrs={'class':'navigation'})
|
||||||
|
remove_tags_after = dict(attrs={'class':'commentlist'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['postmetadata alt','sharedaddy sharedaddy-dark sd-like-enabled sd-sharing-enabled','reply','navigation']})
|
||||||
|
,dict(attrs={'id':'respond'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'Clanci', u'http://cvecezla.wordpress.com/feed/')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
21
recipes/dobreprogamy.recipe
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class Dobreprogramy_pl(BasicNewsRecipe):
|
||||||
|
title = 'Dobreprogramy.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
__licence__ ='GPL v3'
|
||||||
|
category = 'IT'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||||
|
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
extra_css = '.title {font-size:22px;}'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
|
||||||
|
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||||
|
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
113
recipes/fairbanks_daily.recipe
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class FairbanksDailyNewsminer(BasicNewsRecipe):
|
||||||
|
title = u'Fairbanks Daily News-miner'
|
||||||
|
__author__ = 'Roger'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
description = 'The voice of interior Alaska since 1903'
|
||||||
|
publisher = 'http://www.newsminer.com/'
|
||||||
|
category = 'news, Alaska, Fairbanks'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
# Make article titles, author and date bold, italic or small font.
|
||||||
|
# http://assets.matchbin.com/sites/635/stylesheets/newsminer.com.css
|
||||||
|
# (signature_line contains date, views, comments)
|
||||||
|
extra_css = '''
|
||||||
|
.story_item_headline { font-size: medium; font-weight: bold; }
|
||||||
|
.story_item_author { font-size: small; font-style:italic; }
|
||||||
|
.signature_line { font-size: small; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
|
||||||
|
# TODO: The News-miner cover image seems a bit small. Can this be enlarged by 10-30%?
|
||||||
|
masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
|
||||||
|
|
||||||
|
|
||||||
|
# In order to omit seeing number of views, number of posts and the pipe
|
||||||
|
# symbol for divider after the title and date of the article, a regex or
|
||||||
|
# manual processing is needed to get just the "story_item_date updated"
|
||||||
|
# (which contains the date). Everything else on this line is pretty much not needed.
|
||||||
|
#
|
||||||
|
# Currently, you will see the following:
|
||||||
|
# | Aug 24, 2011 | 654 views | 6 | |
|
||||||
|
# (ie. 6 comments)
|
||||||
|
#
|
||||||
|
# HTML line containing story_item_date:
|
||||||
|
# <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span> | 2370 views | 52 <a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a> | <span id="number_recommendations_15183753" class="number_recommendations">9</span> <a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a> | <a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a> | <span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
|
||||||
|
|
||||||
|
# The following was suggested, but it looks like I also need to define self & soup
|
||||||
|
# (as well as bring in extra soup depends?)
|
||||||
|
#date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
|
||||||
|
|
||||||
|
#preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
|
||||||
|
#preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
|
||||||
|
|
||||||
|
#preprocess_regexps = [
|
||||||
|
# (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||||
|
# ]
|
||||||
|
|
||||||
|
#def get_browser(self):
|
||||||
|
#def preprocess_html(soup, first_fetch):
|
||||||
|
# date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
|
||||||
|
# return
|
||||||
|
|
||||||
|
#preprocess_regexps = [(re.compile(r' |.*?', re.DOTALL), lambda m: '')]
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
#dict(name='div', attrs={'class':'hnews hentry item'}),
|
||||||
|
dict(name='div', attrs={'class':'story_item_headline entry-title'}),
|
||||||
|
#dict(name='div', attrs={'class':'story_item_author'}),
|
||||||
|
#dict(name='span', attrs={'class':'story_item_date updated'}),
|
||||||
|
#dict(name='div', attrs={'class':'story_item_author'}),
|
||||||
|
dict(name='div', attrs={'class':'full_story'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
# Try getting rid of some signature_line (date line) stuff
|
||||||
|
#dict(name='img', attrs={'alt'}),
|
||||||
|
dict(name='img', attrs={'class':'dont_touch_me'}),
|
||||||
|
dict(name='span', attrs={'class':'number_recommendations'}),
|
||||||
|
#dict(name='div', attrs={'class':'signature_line'}),
|
||||||
|
|
||||||
|
# Removes div within <!-- AddThis Button BEGIN --> <!-- AddThis Button END -->
|
||||||
|
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
|
||||||
|
|
||||||
|
dict(name='div', attrs={'class':'related_content'}),
|
||||||
|
dict(name='div', attrs={'id':'comments_container'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Comment-out or uncomment any of the following RSS feeds according to your
|
||||||
|
# liking.
|
||||||
|
#
|
||||||
|
# TODO: Some random bits of text might be trailing the last page (or TOC on
|
||||||
|
# MOBI files), these are bits of public posts and comments and need to also
|
||||||
|
# be removed.
|
||||||
|
#
|
||||||
|
feeds = [
|
||||||
|
(u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
|
||||||
|
(u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
|
||||||
|
(u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
|
||||||
|
(u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
|
||||||
|
(u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
|
||||||
|
(u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
|
||||||
|
#(u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
|
||||||
|
(u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
|
||||||
|
#(u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
|
||||||
|
#(u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
|
||||||
|
(u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
|
||||||
|
(u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
|
||||||
|
#(u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
|
||||||
|
(u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
|
||||||
|
#(u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
|
||||||
|
]
|
||||||
|
|
40
recipes/film_web.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Filmweb_pl(BasicNewsRecipe):
|
||||||
|
title = u'FilmWeb'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'FilmWeb - biggest polish movie site'
|
||||||
|
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
||||||
|
category = 'movies'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
extra_css = '.hdrBig {font-size:22px;}'
|
||||||
|
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||||
|
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
||||||
|
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||||
|
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||||
|
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||||
|
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
|
||||||
|
(u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
|
||||||
|
(u'News / Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
|
||||||
|
(u'News / Dystrybucja dvd / blu-ray', u'http://www.filmweb.pl/feed/news/category/video'),
|
||||||
|
(u'News / Dystrybucja kinowa', u'http://www.filmweb.pl/feed/news/category/cinema'),
|
||||||
|
(u'News / off', u'http://www.filmweb.pl/feed/news/category/off'),
|
||||||
|
(u'News / Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
|
||||||
|
(u'News / Organizacje branżowe', u'http://www.filmweb.pl/feed/news/category/organizations'),
|
||||||
|
(u'News / Internet', u'http://www.filmweb.pl/feed/news/category/internet'),
|
||||||
|
(u'News / Różne', u'http://www.filmweb.pl/feed/news/category/other'),
|
||||||
|
(u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
|
||||||
|
(u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
|
||||||
|
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
|
||||||
|
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
|
||||||
|
|
||||||
|
def skip_ad_pages(self, soup):
|
||||||
|
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})['href']
|
||||||
|
#self.log.warn(skip_tag)
|
||||||
|
if skip_tag is not None:
|
||||||
|
return self.index_to_soup(skip_tag, raw=True)
|
||||||
|
else:
|
||||||
|
None
|
@ -5,6 +5,7 @@ www.ft.com/uk-edition
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -22,8 +23,11 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
temp_files = []
|
||||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||||
|
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||||
INDEX = 'http://www.ft.com/uk-edition'
|
INDEX = 'http://www.ft.com/uk-edition'
|
||||||
PREFIX = 'http://www.ft.com'
|
PREFIX = 'http://www.ft.com'
|
||||||
|
|
||||||
@ -39,14 +43,19 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
br.open(self.INDEX)
|
br.open(self.INDEX)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open(self.LOGIN)
|
br.open(self.LOGIN2)
|
||||||
br.select_form(name='loginForm')
|
br.select_form(name='loginForm')
|
||||||
br['username'] = self.username
|
br['username'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
|
||||||
|
,dict(name='div', attrs={'class':'standfirst'})
|
||||||
|
,dict(name='div', attrs={'id' :'storyContent'})
|
||||||
|
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
|
||||||
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'floating-con'})
|
dict(name='div', attrs={'id':'floating-con'})
|
||||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||||
@ -68,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_artlinks(self, elem):
|
def get_artlinks(self, elem):
|
||||||
articles = []
|
articles = []
|
||||||
|
count = 0
|
||||||
for item in elem.findAll('a',href=True):
|
for item in elem.findAll('a',href=True):
|
||||||
|
count = count + 1
|
||||||
|
if self.test and count > 2:
|
||||||
|
return articles
|
||||||
rawlink = item['href']
|
rawlink = item['href']
|
||||||
if rawlink.startswith('http://'):
|
if rawlink.startswith('http://'):
|
||||||
url = rawlink
|
url = rawlink
|
||||||
else:
|
else:
|
||||||
url = self.PREFIX + rawlink
|
url = self.PREFIX + rawlink
|
||||||
|
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||||
title = self.tag_to_string(item)
|
title = self.tag_to_string(item)
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title' :title
|
||||||
,'date' :date
|
,'date' :date
|
||||||
,'url' :url
|
,'url' :urlverified
|
||||||
,'description':''
|
,'description':''
|
||||||
})
|
})
|
||||||
return articles
|
return articles
|
||||||
@ -96,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
||||||
if st:
|
if st:
|
||||||
strest.insert(0,st)
|
strest.insert(0,st)
|
||||||
|
count = 0
|
||||||
for item in strest:
|
for item in strest:
|
||||||
|
count = count + 1
|
||||||
|
if self.test and count > 2:
|
||||||
|
return feeds
|
||||||
ftitle = self.tag_to_string(item)
|
ftitle = self.tag_to_string(item)
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||||
feedarts = self.get_artlinks(item.parent.ul)
|
feedarts = self.get_artlinks(item.parent.ul)
|
||||||
@ -136,3 +154,18 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
cdate -= datetime.timedelta(days=1)
|
cdate -= datetime.timedelta(days=1)
|
||||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
count = 0
|
||||||
|
while (count < 10):
|
||||||
|
try:
|
||||||
|
response = self.browser.open(url)
|
||||||
|
html = response.read()
|
||||||
|
count = 10
|
||||||
|
except:
|
||||||
|
print "Retrying download..."
|
||||||
|
count += 1
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(html)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
39
recipes/fluter_de.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch fluter.de
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1313693926(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Fluter'
|
||||||
|
description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb'
|
||||||
|
language = 'de'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
|
||||||
|
__author__ = 'Armin Geller' # 2011-08-19
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':["comments"]}),
|
||||||
|
dict(attrs={'class':['commentlink']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':["grid_8 articleText"]}),
|
||||||
|
dict(name='div', attrs={'class':["articleTextInnerText"]}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '.cs_img {margin-right: 10pt;}'
|
||||||
|
|
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||||
@ -16,7 +17,7 @@ class FolhaOnline(BasicNewsRecipe):
|
|||||||
news = True
|
news = True
|
||||||
|
|
||||||
title = u'Folha de S\xE3o Paulo'
|
title = u'Folha de S\xE3o Paulo'
|
||||||
__author__ = 'Euler Alves'
|
__author__ = 'Euler Alves and Alex Mitrani'
|
||||||
description = u'Brazilian news from Folha de S\xE3o Paulo'
|
description = u'Brazilian news from Folha de S\xE3o Paulo'
|
||||||
publisher = u'Folha de S\xE3o Paulo'
|
publisher = u'Folha de S\xE3o Paulo'
|
||||||
category = 'news, rss'
|
category = 'news, rss'
|
||||||
@ -62,37 +63,50 @@ class FolhaOnline(BasicNewsRecipe):
|
|||||||
,dict(name='div',
|
,dict(name='div',
|
||||||
attrs={'class':[
|
attrs={'class':[
|
||||||
'openBox adslibraryArticle'
|
'openBox adslibraryArticle'
|
||||||
|
,'toolbar'
|
||||||
]})
|
]})
|
||||||
|
|
||||||
,dict(name='a')
|
,dict(name='a')
|
||||||
,dict(name='iframe')
|
,dict(name='iframe')
|
||||||
,dict(name='link')
|
,dict(name='link')
|
||||||
,dict(name='script')
|
,dict(name='script')
|
||||||
|
,dict(name='li')
|
||||||
]
|
]
|
||||||
|
remove_tags_after = dict(name='div',attrs={'id':'articleEnd'})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
|
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
|
||||||
|
,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml')
|
||||||
|
,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml')
|
||||||
|
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
||||||
|
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
||||||
|
,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml')
|
||||||
|
,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml')
|
||||||
|
,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml')
|
||||||
|
,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml')
|
||||||
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
|
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
|
||||||
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
|
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
|
||||||
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
|
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
|
||||||
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
|
||||||
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
|
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
|
||||||
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
|
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
|
||||||
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml')
|
||||||
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
|
,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml')
|
||||||
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
|
,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml')
|
||||||
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
|
,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml')
|
||||||
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
|
,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml')
|
||||||
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
|
,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml')
|
||||||
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
|
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml')
|
||||||
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
|
,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml')
|
||||||
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
|
,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml')
|
||||||
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
|
,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml')
|
||||||
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
|
,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml')
|
||||||
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
|
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml')
|
||||||
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
|
,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml')
|
||||||
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
|
,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml')
|
||||||
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
|
,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml')
|
||||||
|
,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml')
|
||||||
|
,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml')
|
||||||
|
,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
16
recipes/gram_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gram_pl(BasicNewsRecipe):
|
||||||
|
title = u'Gram.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'Gram.pl - site about computer games'
|
||||||
|
category = 'games'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
|
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||||
|
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||||
|
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
|
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
86
recipes/hackernews.recipe
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
'''
|
||||||
|
Hacker News
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
class HackerNews(BasicNewsRecipe):
|
||||||
|
title = 'Hacker News'
|
||||||
|
__author__ = 'Tom Scholl'
|
||||||
|
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
|
||||||
|
publisher = 'Y Combinator'
|
||||||
|
category = 'news, programming, it, technology'
|
||||||
|
masthead_url = 'http://i55.tinypic.com/2u6io76.png'
|
||||||
|
cover_url = 'http://i55.tinypic.com/2u6io76.png'
|
||||||
|
delay = 1
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'en'
|
||||||
|
requires_version = (0,8,16)
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Hacker News', 'http://news.ycombinator.com/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
temp_files = []
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
|
def get_readable_content(self, url):
|
||||||
|
self.log('get_readable_content(' + url + ')')
|
||||||
|
br = self.get_browser()
|
||||||
|
f = br.open(url)
|
||||||
|
html = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
return self.extract_readable_article(html, url)
|
||||||
|
|
||||||
|
def get_hn_content(self, url):
|
||||||
|
self.log('get_hn_content(' + url + ')')
|
||||||
|
# this could be improved
|
||||||
|
br = self.get_browser()
|
||||||
|
f = br.open(url)
|
||||||
|
html = f.read()
|
||||||
|
f.close()
|
||||||
|
return html
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
if url.startswith('http://news.ycombinator.com'):
|
||||||
|
content = self.get_hn_content(url)
|
||||||
|
else:
|
||||||
|
# TODO: use content-type header instead of url
|
||||||
|
is_image = False
|
||||||
|
for ext in ['.jpg', '.png', '.svg', '.gif', '.jpeg', '.tiff', '.bmp',]:
|
||||||
|
if url.endswith(ext):
|
||||||
|
is_image = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if is_image:
|
||||||
|
self.log('using image_content (' + url + ')')
|
||||||
|
content = u'<html><body><img src="' + url + u'"></body></html>'
|
||||||
|
else:
|
||||||
|
content = self.get_readable_content(url)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(content)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
|
def is_link_wanted(self, url, tag):
|
||||||
|
if url.endswith('.pdf'):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def prettyify_url(self, url):
|
||||||
|
return urlparse(url).hostname
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
article.text_summary = self.prettyify_url(article.url)
|
||||||
|
article.summary = article.text_summary
|
||||||
|
|
||||||
|
|
@ -13,6 +13,8 @@ class HBR(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||||
|
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
|
||||||
|
|
||||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||||
@ -34,6 +36,9 @@ class HBR(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
self.logout_url = None
|
||||||
|
|
||||||
|
#'''
|
||||||
br.open(self.LOGIN_URL)
|
br.open(self.LOGIN_URL)
|
||||||
br.select_form(name='signin-form')
|
br.select_form(name='signin-form')
|
||||||
br['signin-form:username'] = self.username
|
br['signin-form:username'] = self.username
|
||||||
@ -41,10 +46,13 @@ class HBR(BasicNewsRecipe):
|
|||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'My Account' not in raw:
|
if 'My Account' not in raw:
|
||||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||||
self.logout_url = None
|
try:
|
||||||
link = br.find_link(text='Sign out')
|
link = br.find_link(text='Sign out')
|
||||||
if link:
|
if link:
|
||||||
self.logout_url = link.absolute_url
|
self.logout_url = link.absolute_url
|
||||||
|
except:
|
||||||
|
self.logout_url = self.LOGOUT_URL
|
||||||
|
#'''
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
@ -57,6 +65,8 @@ class HBR(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def hbr_get_toc(self):
|
def hbr_get_toc(self):
|
||||||
|
#return self.index_to_soup(open('/t/hbr.html').read())
|
||||||
|
|
||||||
today = date.today()
|
today = date.today()
|
||||||
future = today + timedelta(days=30)
|
future = today + timedelta(days=30)
|
||||||
for x in [x.strftime('%y%m') for x in (future, today)]:
|
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||||
@ -66,53 +76,43 @@ class HBR(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
raise Exception('Could not find current issue')
|
raise Exception('Could not find current issue')
|
||||||
|
|
||||||
def hbr_parse_section(self, container, feeds):
|
def hbr_parse_toc(self, soup):
|
||||||
|
feeds = []
|
||||||
current_section = None
|
current_section = None
|
||||||
current_articles = []
|
articles = []
|
||||||
for x in container.findAll(name=['li', 'h3', 'h4']):
|
for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
|
||||||
if x.name in ['h3', 'h4'] and not x.findAll(True):
|
if x.name == 'h3':
|
||||||
if current_section and current_articles:
|
if current_section is not None and articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, articles))
|
||||||
current_section = self.tag_to_string(x)
|
current_section = self.tag_to_string(x).capitalize()
|
||||||
current_articles = []
|
articles = []
|
||||||
self.log('\tFound section:', current_section)
|
self.log('\tFound section:', current_section)
|
||||||
if x.name == 'li':
|
else:
|
||||||
a = x.find('a', href=True)
|
a = x.find('a', href=True)
|
||||||
if a is not None:
|
if a is None: continue
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href')
|
url = a['href']
|
||||||
if '/ar/' not in url:
|
if '/ar/' not in url:
|
||||||
continue
|
continue
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://hbr.org' + url
|
url = 'http://hbr.org' + url
|
||||||
url = self.map_url(url)
|
url = self.map_url(url)
|
||||||
p = x.find('p')
|
p = x.parent.find('p')
|
||||||
desc = ''
|
desc = ''
|
||||||
if p is not None:
|
if p is not None:
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
if not title or not url:
|
|
||||||
continue
|
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
self.log('\t\t\t', url)
|
self.log('\t\t\t', url)
|
||||||
self.log('\t\t\t', desc)
|
self.log('\t\t\t', desc)
|
||||||
current_articles.append({'title':title, 'url':url,
|
|
||||||
'description':desc, 'date':''})
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
|
|
||||||
|
articles.append({'title':title, 'url':url, 'description':desc,
|
||||||
|
'date':''})
|
||||||
def hbr_parse_toc(self, soup):
|
|
||||||
feeds = []
|
|
||||||
features = soup.find(id='issueFeaturesContent')
|
|
||||||
self.hbr_parse_section(features, feeds)
|
|
||||||
departments = soup.find(id='issueDepartments')
|
|
||||||
self.hbr_parse_section(departments, feeds)
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.hbr_get_toc()
|
soup = self.hbr_get_toc()
|
||||||
|
#open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||||
feeds = self.hbr_parse_toc(soup)
|
feeds = self.hbr_parse_toc(soup)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
@ -6,33 +6,21 @@ class HBR(BasicNewsRecipe):
|
|||||||
title = 'Harvard Business Review Blogs'
|
title = 'Harvard Business Review Blogs'
|
||||||
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
__author__ = 'Kovid Goyal, enhanced by BrianG'
|
__author__ = 'Kovid Goyal'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||||
|
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
|
||||||
|
|
||||||
INDEX = 'http://hbr.org/current'
|
INDEX = 'http://hbr.org/current'
|
||||||
|
|
||||||
#
|
|
||||||
# Blog Stuff
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
INCLUDE_BLOGS = True
|
|
||||||
INCLUDE_ARTICLES = False
|
|
||||||
|
|
||||||
# option-specific settings.
|
|
||||||
|
|
||||||
if INCLUDE_BLOGS == True:
|
|
||||||
remove_tags_after = dict(id='articleBody')
|
remove_tags_after = dict(id='articleBody')
|
||||||
remove_tags_before = dict(id='pageFeature')
|
remove_tags_before = dict(id='pageFeature')
|
||||||
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
else:
|
|
||||||
timefmt = ' [%B %Y]'
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', id='pageContainer')
|
keep_only_tags = [ dict(name='div', id='pageContainer')
|
||||||
]
|
]
|
||||||
@ -41,21 +29,15 @@ class HBR(BasicNewsRecipe):
|
|||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
|
'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter']),
|
'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
|
||||||
dict(name='iframe')]
|
dict(name=['iframe', 'style'])]
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
|
||||||
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
|
||||||
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
|
|
||||||
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
|
|
||||||
#articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
|
|
||||||
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
self.logout_url = None
|
||||||
|
|
||||||
|
#'''
|
||||||
br.open(self.LOGIN_URL)
|
br.open(self.LOGIN_URL)
|
||||||
br.select_form(name='signin-form')
|
br.select_form(name='signin-form')
|
||||||
br['signin-form:username'] = self.username
|
br['signin-form:username'] = self.username
|
||||||
@ -63,11 +45,15 @@ class HBR(BasicNewsRecipe):
|
|||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'My Account' not in raw:
|
if 'My Account' not in raw:
|
||||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||||
self.logout_url = None
|
try:
|
||||||
link = br.find_link(text='Sign out')
|
link = br.find_link(text='Sign out')
|
||||||
if link:
|
if link:
|
||||||
self.logout_url = link.absolute_url
|
self.logout_url = link.absolute_url
|
||||||
|
except:
|
||||||
|
self.logout_url = self.LOGOUT_URL
|
||||||
|
#'''
|
||||||
return br
|
return br
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------------------------
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
if self.logout_url is not None:
|
if self.logout_url is not None:
|
||||||
@ -76,99 +62,7 @@ class HBR(BasicNewsRecipe):
|
|||||||
def map_url(self, url):
|
def map_url(self, url):
|
||||||
if url.endswith('/ar/1'):
|
if url.endswith('/ar/1'):
|
||||||
return url[:-1]+'pr'
|
return url[:-1]+'pr'
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def hbr_get_toc(self):
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
|
|
||||||
return self.index_to_soup('http://hbr.org'+url)
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def hbr_parse_section(self, container, feeds):
|
|
||||||
current_section = None
|
|
||||||
current_articles = []
|
|
||||||
for x in container.findAll(name=['li', 'h3', 'h4']):
|
|
||||||
if x.name in ['h3', 'h4'] and not x.findAll(True):
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
current_section = self.tag_to_string(x)
|
|
||||||
current_articles = []
|
|
||||||
self.log('\tFound section:', current_section)
|
|
||||||
if x.name == 'li':
|
|
||||||
a = x.find('a', href=True)
|
|
||||||
if a is not None:
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
url = a.get('href')
|
|
||||||
if '/ar/' not in url:
|
|
||||||
continue
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://hbr.org'+url
|
|
||||||
url = self.map_url(url)
|
|
||||||
p = x.find('p')
|
|
||||||
desc = ''
|
|
||||||
if p is not None:
|
|
||||||
desc = self.tag_to_string(p)
|
|
||||||
if not title or not url:
|
|
||||||
continue
|
|
||||||
self.log('\t\tFound article:', title)
|
|
||||||
self.log('\t\t\t', url)
|
|
||||||
self.log('\t\t\t', desc)
|
|
||||||
current_articles.append({'title':title, 'url':url,
|
|
||||||
'description':desc, 'date':''})
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def hbr_parse_toc(self, soup):
|
|
||||||
feeds = []
|
|
||||||
features = soup.find(id='issueFeaturesContent')
|
|
||||||
self.hbr_parse_section(features, feeds)
|
|
||||||
departments = soup.find(id='issueDepartments')
|
|
||||||
self.hbr_parse_section(departments, feeds)
|
|
||||||
return feeds
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
def feed_to_index_append(self, feedObject, masterFeed):
|
|
||||||
# Loop thru the feed object and build the correct type of article list
|
|
||||||
for feed in feedObject:
|
|
||||||
# build the correct structure from the feed object
|
|
||||||
newArticles = []
|
|
||||||
for article in feed.articles:
|
|
||||||
newArt = {
|
|
||||||
'title' : article.title,
|
|
||||||
'url' : article.url,
|
|
||||||
'date' : article.date,
|
|
||||||
'description' : article.text_summary
|
|
||||||
}
|
|
||||||
newArticles.append(newArt)
|
|
||||||
|
|
||||||
# Append the earliest/latest dates of the feed to the feed title
|
|
||||||
startDate, endDate = self.get_feed_dates(feed, '%d-%b')
|
|
||||||
newFeedTitle = feed.title + ' (' + startDate + ' thru ' + endDate + ')'
|
|
||||||
|
|
||||||
# append the newly-built list object to the index object passed in
|
|
||||||
# as masterFeed.
|
|
||||||
masterFeed.append( (newFeedTitle,newArticles) )
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
def get_feed_dates(self, feedObject, dateMask):
|
|
||||||
startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
|
|
||||||
endDate = feedObject.articles[0].localtime.strftime(dateMask)
|
|
||||||
|
|
||||||
return startDate, endDate
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
if self.INCLUDE_ARTICLES == True:
|
|
||||||
soup = self.hbr_get_toc()
|
|
||||||
feeds = self.hbr_parse_toc(soup)
|
|
||||||
else:
|
|
||||||
return BasicNewsRecipe.parse_index(self)
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
index = 'http://hbr.org/current'
|
index = 'http://hbr.org/current'
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
import string, pprint
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class HoustonChronicle(BasicNewsRecipe):
|
class HoustonChronicle(BasicNewsRecipe):
|
||||||
@ -13,53 +11,28 @@ class HoustonChronicle(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
keep_only_tags = [
|
oldest_article = 2.0
|
||||||
dict(id=['story-head', 'story'])
|
|
||||||
|
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
|
||||||
|
'hst-articletext' in x or 'hst-galleryitem' in x)}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News', "http://www.chron.com/rss/feed/News-270.php"),
|
||||||
|
('Sports',
|
||||||
|
'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
|
||||||
|
('Neighborhood',
|
||||||
|
'http://www.chron.com/rss/feed/Neighborhood-305.php'),
|
||||||
|
('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
|
||||||
|
('Entertainment',
|
||||||
|
'http://www.chron.com/rss/feed/Entertainment-293.php'),
|
||||||
|
('Editorials',
|
||||||
|
'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
|
||||||
|
('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
|
||||||
|
('Science & Tech',
|
||||||
|
'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(id=['share-module', 'resource-box',
|
|
||||||
'resource-box-header'])
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
|
||||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
|
||||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
|
||||||
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
|
||||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
|
|
||||||
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
|
|
||||||
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
|
||||||
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
|
||||||
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
|
|
||||||
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
|
|
||||||
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
categories = ['news', 'sports', 'business', 'entertainment', 'life',
|
|
||||||
'travel']
|
|
||||||
feeds = []
|
|
||||||
for cat in categories:
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
|
|
||||||
for elem in soup.findAll(comptype='story', storyid=True):
|
|
||||||
a = elem.find('a', href=True)
|
|
||||||
if a is None: continue
|
|
||||||
url = a['href']
|
|
||||||
if not url.startswith('http://'):
|
|
||||||
url = 'http://www.chron.com'+url
|
|
||||||
articles.append({'title':self.tag_to_string(a), 'url':url,
|
|
||||||
'description':'', 'date':''})
|
|
||||||
pprint.pprint(articles[-1])
|
|
||||||
if articles:
|
|
||||||
feeds.append((string.capwords(cat), articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
BIN
recipes/icons/android_com_pl.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/bash_org_pl.png
Normal file
After Width: | Height: | Size: 391 B |
BIN
recipes/icons/cd_action.png
Normal file
After Width: | Height: | Size: 972 B |
BIN
recipes/icons/dobreprogamy.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/film_web.png
Normal file
After Width: | Height: | Size: 3.4 KiB |
BIN
recipes/icons/gram_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/niebezpiecznik.png
Normal file
After Width: | Height: | Size: 795 B |
BIN
recipes/icons/wnp.png
Normal file
After Width: | Height: | Size: 576 B |
@ -7,8 +7,9 @@ latercera.com
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LaTercera(BasicNewsRecipe):
|
class LaTercera(BasicNewsRecipe):
|
||||||
|
news = True
|
||||||
title = 'La Tercera'
|
title = 'La Tercera'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Alex Mitrani'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
publisher = 'La Tercera'
|
publisher = 'La Tercera'
|
||||||
category = 'news, politics, Chile'
|
category = 'news, politics, Chile'
|
||||||
@ -18,7 +19,7 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
language = 'es'
|
language = 'es_CL'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -28,28 +29,33 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':['titularArticulo']})
|
||||||
|
,dict(name='h4', attrs={'class':['bajadaArt']})
|
||||||
|
,dict(name='h5', attrs={'class':['autorArt']})
|
||||||
|
,dict(name='div', attrs={'class':['articleContent']})
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['ul','input','base'])
|
dict(name='div', attrs={'class':['boxCompartir','keywords']})
|
||||||
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
|
]
|
||||||
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
|
|
||||||
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['keywords']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [(u'La Tercera', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
||||||
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
|
||||||
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
|
|
||||||
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
||||||
|
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
|
||||||
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
||||||
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
|
||||||
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
||||||
,(u'Entretenimiento', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661')
|
,(u'Santiago', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1731')
|
||||||
,(u'Motores', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=665')
|
|
||||||
,(u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659')
|
,(u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659')
|
||||||
,(u'Estilo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=660')
|
|
||||||
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
||||||
|
,(u'Cultura', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1453')
|
||||||
|
,(u'Entretención', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661')
|
||||||
|
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -23,14 +23,21 @@ class Liberation(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1')
|
dict(name='h1')
|
||||||
,dict(name='div', attrs={'class':'articleContent'})
|
#,dict(name='div', attrs={'class':'object-content text text-item'})
|
||||||
|
,dict(name='div', attrs={'class':'article'})
|
||||||
|
#,dict(name='div', attrs={'class':'articleContent'})
|
||||||
,dict(name='div', attrs={'class':'entry'})
|
,dict(name='div', attrs={'class':'entry'})
|
||||||
]
|
]
|
||||||
|
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='p', attrs={'class':'clear'})
|
dict(name='p', attrs={'class':'clear'})
|
||||||
,dict(name='ul', attrs={'class':'floatLeft clear'})
|
,dict(name='ul', attrs={'class':'floatLeft clear'})
|
||||||
,dict(name='div', attrs={'class':'clear floatRight'})
|
,dict(name='div', attrs={'class':'clear floatRight'})
|
||||||
,dict(name='object')
|
,dict(name='object')
|
||||||
|
,dict(name='div', attrs={'class':'toolbox'})
|
||||||
|
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
|
||||||
|
#,dict(name='div', attrs={'class':'clear block block-call-items'})
|
||||||
|
,dict(name='div', attrs={'class':'block-content'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -6,11 +6,13 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
msdn.microsoft.com/en-us/magazine
|
msdn.microsoft.com/en-us/magazine
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
||||||
|
|
||||||
class MSDNMagazine_en(BasicNewsRecipe):
|
class MSDNMagazine_en(BasicNewsRecipe):
|
||||||
title = 'MSDN Magazine'
|
title = 'MSDN Magazine'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'The Microsoft Journal for Developers'
|
description = 'The Microsoft Journal for Developers'
|
||||||
|
masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
|
||||||
publisher = 'Microsoft Press'
|
publisher = 'Microsoft Press'
|
||||||
category = 'news, IT, Microsoft, programming, windows'
|
category = 'news, IT, Microsoft, programming, windows'
|
||||||
oldest_article = 31
|
oldest_article = 31
|
||||||
@ -20,24 +22,44 @@ class MSDNMagazine_en(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
|
||||||
|
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
|
keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','base','table'])
|
dict(name='div', attrs={'class':'DivRatingsOnly'})
|
||||||
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
|
,dict(name='div', attrs={'class':'ShareThisButton4'})
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'navpage'})
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def find_articles(self):
|
||||||
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
|
idx_contents = self.browser.open(self.rss_url).read()
|
||||||
item.name="h2"
|
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
||||||
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
|
|
||||||
item.name="h1"
|
for article in idx.findAll('item'):
|
||||||
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
|
desc_html = self.tag_to_string(article.find('description'))
|
||||||
item.name="h3"
|
description = self.tag_to_string(BeautifulSoup(desc_html))
|
||||||
return soup
|
|
||||||
|
a = {
|
||||||
|
'title': self.tag_to_string(article.find('title')),
|
||||||
|
'url': self.tag_to_string(article.find('link')),
|
||||||
|
'description': description,
|
||||||
|
'date' : self.tag_to_string(article.find('pubdate')),
|
||||||
|
}
|
||||||
|
yield a
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.base_url)
|
||||||
|
|
||||||
|
#find issue name, eg "August 2011"
|
||||||
|
issue_name = self.tag_to_string(soup.find('h1'))
|
||||||
|
|
||||||
|
# find cover pic
|
||||||
|
img = soup.find('img',attrs ={'alt':issue_name})
|
||||||
|
if img is not None:
|
||||||
|
self.cover_url = img['src']
|
||||||
|
|
||||||
|
return [(issue_name, list(self.find_articles()))]
|
||||||
|
|
||||||
|
16
recipes/niebezpiecznik.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Niebezpiecznik_pl(BasicNewsRecipe):
|
||||||
|
title = u'Niebezpiecznik.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'Niebezpiecznik.pl'
|
||||||
|
category = 'hacking, IT'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
|
||||||
|
remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
|
||||||
|
keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
|
||||||
|
feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
|
||||||
|
('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
|
@ -5,7 +5,6 @@ class PolitiFactCom(BasicNewsRecipe):
|
|||||||
__author__ = u'Michael Heinz'
|
__author__ = u'Michael Heinz'
|
||||||
oldest_article = 21
|
oldest_article = 21
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
recursion = 0
|
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -27,4 +26,7 @@ class PolitiFactCom(BasicNewsRecipe):
|
|||||||
(u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
|
(u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
return br
|
||||||
|
@ -25,11 +25,13 @@ class Reuters(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
|
remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
|
||||||
dict(name='p', attrs={'class':['relatedTopics']}),
|
dict(name='p', attrs={'class':['relatedTopics']}),
|
||||||
dict(name='a', attrs={'id':['fullSizeLink']}),
|
dict(name='a', attrs={'id':['fullSizeLink']}),
|
||||||
dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]
|
dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),
|
||||||
|
# Remove the Tweet, Share this, Email and Print links below article title too!
|
||||||
|
dict(name='div', attrs={'class':['columnRight']}),
|
||||||
|
]
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
@ -40,9 +42,7 @@ class Reuters(BasicNewsRecipe):
|
|||||||
(r'<body>.*?<div class="contentBand">', lambda match : '<body>'),
|
(r'<body>.*?<div class="contentBand">', lambda match : '<body>'),
|
||||||
(r'<h3>Share:</h3>.*?</body>', lambda match : '<!-- END:: Shared Module id=36615 --></body>'),
|
(r'<h3>Share:</h3>.*?</body>', lambda match : '<!-- END:: Shared Module id=36615 --></body>'),
|
||||||
(r'<div id="atools" class="articleTools">.*?<div class="linebreak">', lambda match : '<div class="linebreak">'),
|
(r'<div id="atools" class="articleTools">.*?<div class="linebreak">', lambda match : '<div class="linebreak">'),
|
||||||
]
|
]]
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'),
|
feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'),
|
||||||
@ -54,4 +54,3 @@ class Reuters(BasicNewsRecipe):
|
|||||||
('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
|
('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
|
||||||
('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
|
('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
63
recipes/samanyolu_haber.recipe
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SHaber (BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Samanyolu Haber'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = ' Samanyolu Haber Sitesinden günlük haberler '
|
||||||
|
oldest_article =2
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'thomass'
|
||||||
|
category = 'güncel, haber, türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||||
|
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||||
|
|
||||||
|
cover_img_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||||
|
masthead_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Son Dakika', u'http://podcast.samanyoluhaber.com/sondakika.rss'),
|
||||||
|
( u'Gündem', u'http://podcast.samanyoluhaber.com/gundem.rss'),
|
||||||
|
( u'Politika ', u'http://podcast.samanyoluhaber.com/politika.rss'),
|
||||||
|
( u'Ekonomi', u'http://podcast.samanyoluhaber.com/ekonomi.rss'),
|
||||||
|
( u'Dünya', u'http://podcast.samanyoluhaber.com/dunya.rss'),
|
||||||
|
( u'Spor ', u'http://podcast.samanyoluhaber.com/spor.rss'),
|
||||||
|
( u'Sağlık', u'http://podcast.samanyoluhaber.com/saglik.rss'),
|
||||||
|
( u'Kültür', u'http://podcast.samanyoluhaber.com/kultur.rss'),
|
||||||
|
#( u'Teknoloji ', u'http://podcast.samanyoluhaber.com/teknoloji.rss'),
|
||||||
|
( u'Eğitim', u'http://podcast.samanyoluhaber.com/egitim.rss'),
|
||||||
|
( u'Ramazan', u'http://podcast.samanyoluhaber.com/ramazan.rss'),
|
||||||
|
( u'Yazarlar ', u'http://podcast.samanyoluhaber.com/yazarlar.rss'),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
# def print_version(self, url):
|
||||||
|
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||||
|
|
55
recipes/samanyolu_teknoloji.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SHaberTekno (BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Samanyolu Teknoloji'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'Samanyolu Teknoloji Haber Sitesinden haberler '
|
||||||
|
oldest_article =8
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'thomass'
|
||||||
|
category = 'bilim, teknoloji, haber, türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
extra_css = ' .IcerikMetin{ font-family:sans-serif;font-weight: normal;font-size: 10px } .h1IcerikBaslik {font-weight: bold; font-size: 18px}' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['IcerikBaslik','IcerikMetinDiv']})]#,dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||||
|
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
|
||||||
|
|
||||||
|
cover_img_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
|
||||||
|
masthead_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'GENEL', u'http://podcast.samanyoluhaber.com/Teknoloji.rss'),
|
||||||
|
( u'İNTERNET', u'http://open.dapper.net/services/shaberteknolojiinternet'),
|
||||||
|
( u'CEP TELEFONU', u'http://open.dapper.net/services/shaberteknolojicep'),
|
||||||
|
( u'OYUN', u'http://open.dapper.net/services/shaberteknolojioyun'),
|
||||||
|
( u'DONANIM', u'http://open.dapper.net/services/httpopendappernetservicesshaberteknolojidonanim'),
|
||||||
|
( u'ÜRÜN İNCELEME', u'http://open.dapper.net/services/shaberteknolojiurun'),
|
||||||
|
( u'ALIŞVERİŞ', u'http://open.dapper.net/services/shaberteknolojialisveris'),
|
||||||
|
( u'BİLİM & TEKNOLOJİ', u'http://open.dapper.net/services/shaberteknolojibilim'),
|
||||||
|
( u'HABERLER', u'http://open.dapper.net/services/shaberteknolojihaber'),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
# def print_version(self, url):
|
||||||
|
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||||
|
|
67
recipes/star_gazetesi.recipe
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Star (BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Star Gazetesi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'yeni Türkiye''nin Gazetesi'
|
||||||
|
oldest_article =2
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'thomass'
|
||||||
|
category = 'güncel, haber, türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
extra_css = ' .font8{font-weight: bold; font-size:20px}.font11{font-weight: normal; font-size:small}#hdetay{ font-family:sans-serif;font-size: 9px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['font8']}),dict(name='span', attrs={'class':['font11']}),dict(name='div', attrs={'id':['hdetay']})]#,,dict(name='h6', attrs={'class':['KirmiziText']}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||||
|
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||||
|
|
||||||
|
cover_img_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
|
||||||
|
masthead_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
|
||||||
|
( u'MANSET', u'http://open.dapper.net/services/starmanset'),
|
||||||
|
( u'GÜNCEL', u'http://www.stargazete.com/guncel.xml'),
|
||||||
|
( u'POLİTİKA', u'http://www.stargazete.com/politika.xml'),
|
||||||
|
( u' EKONOMİ', u'http://www.stargazete.com/ekonomi.xml'),
|
||||||
|
( u'DÜNYA', u'http://www.stargazete.com/dunya.xml'),
|
||||||
|
( u'YAZARLAR', u'http://www.stargazete.com/gazeteyazarlar.xml'),
|
||||||
|
( u'SPOR', u'http://www.stargazete.com/spor.xml'),
|
||||||
|
( u'SPOR YAZARLARI', u'http://www.stargazete.com/index.php?metot=rss&islem=sporyazarlar'),
|
||||||
|
( u'SİNEMA', u'http://www.stargazete.com/sinema.xml'),
|
||||||
|
( u'KADIN&SAĞLIK', u'http://www.stargazete.com/kadinsaglik.xml'),
|
||||||
|
( u' STARTEK', u'http://www.stargazete.com/startek.xml'),
|
||||||
|
( u' AÇIK GÖRÜŞ', u'http://www.stargazete.com/acikgorus.xml'),
|
||||||
|
( u'Star PAZAR', u'http://www.stargazete.com/pazar.xml'),
|
||||||
|
( u'Star CUMARTESİ', u'http://www.stargazete.com/cumartesi.xml'),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
def preprocess_html(self, soup):#remove links
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
#def print_version(self, url):
|
||||||
|
#return url.replace('/', 'http://www.stargazete.com/')
|
||||||
|
|
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
@ -9,9 +10,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Sueddeutsche(BasicNewsRecipe):
|
class Sueddeutsche(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'S\xfcddeutsche'
|
title = u'Süddeutsche'
|
||||||
description = 'News from Germany'
|
description = 'News from Germany'
|
||||||
__author__ = 'Oliver Niesner and Sujata Raman'
|
__author__ = 'Oliver Niesner and Armin Geller'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -53,26 +54,29 @@ class Sueddeutsche(BasicNewsRecipe):
|
|||||||
body{font-family:Arial,Helvetica,sans-serif; }
|
body{font-family:Arial,Helvetica,sans-serif; }
|
||||||
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
||||||
|
|
||||||
#feeds = [(u'Topthemen', u'http://suche.sueddeutsche.de/query/politik/-docdatetime/drilldown/%C2%A7documenttype%3AArtikel?output=rss')]
|
feeds = [
|
||||||
|
(u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
|
||||||
feeds = [(u'Wissen', u'http://suche.sueddeutsche.de/query/wissen/nav/%C2%A7ressort%3AWissen/sort/-docdatetime?output=rss'),
|
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
|
||||||
(u'Politik', u'http://suche.sueddeutsche.de/query/politik/nav/%C2%A7ressort%3APolitik/sort/-docdatetime?output=rss'),
|
(u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
|
||||||
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/wirtschaft/nav/%C2%A7ressort%3AWirtschaft/sort/-docdatetime?output=rss'),
|
(u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
|
||||||
(u'Finanzen', u'http://suche.sueddeutsche.de/query/finanzen/nav/%C2%A7ressort%3AGeld/sort/-docdatetime?output=rss'),
|
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
|
||||||
(u'Kultur', u'http://suche.sueddeutsche.de/query/kultur/nav/%C2%A7ressort%3AKultur/sort/-docdatetime?output=rss'),
|
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
|
||||||
(u'Sport', u'http://suche.sueddeutsche.de/query/sport/nav/%C2%A7ressort%3ASport/sort/-docdatetime?output=rss'),
|
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
|
||||||
(u'Bayern', u'http://suche.sueddeutsche.de/query/bayern/nav/%C2%A7ressort%3ABayern/sort/-docdatetime?output=rss'),
|
(u'München&Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
|
||||||
(u'Panorama', u'http://suche.sueddeutsche.de/query/panorama/sort/-docdatetime?output=rss'),
|
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
|
||||||
(u'Leben&Stil', u'http://suche.sueddeutsche.de/query/stil/nav/%C2%A7ressort%3A%22Leben%20%26%20Stil%22/sort/-docdatetime?output=rss'),
|
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
|
||||||
(u'Gesundheit', u'http://suche.sueddeutsche.de/query/gesundheit/nav/%C2%A7ressort%3AGesundheit/sort/-docdatetime?output=rss'),
|
(u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
|
||||||
(u'Auto&Reise', u'http://suche.sueddeutsche.de/query/automobil/nav/%C2%A7ressort%3A%22Auto%20%26%20Mobil%22/sort/-docdatetime?output=rss'),
|
(u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
|
||||||
(u'Computer', u'http://suche.sueddeutsche.de/query/computer/nav/%C2%A7ressort%3AComputer/sort/-docdatetime?output=rss'),
|
(u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
|
||||||
(u'Job&Karriere', u'http://suche.sueddeutsche.de/query/job/nav/%C2%A7ressort%3A%22Job%20%26%20Karriere%22/sort/-docdatetime?output=rss'),
|
(u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
|
||||||
(u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
|
(u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
|
||||||
|
(u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
|
||||||
|
(u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
|
||||||
|
(u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
|
||||||
|
(u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
|
||||||
|
(u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, id = url.rpartition('/')
|
main, sep, id = url.rpartition('/')
|
||||||
return main + '/2.220/' + id
|
return main + '/2.220/' + id
|
||||||
|
@ -40,11 +40,11 @@ class SVD_se(BasicNewsRecipe):
|
|||||||
,(u'Kultur' , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
|
,(u'Kultur' , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'articlecontent'})]
|
keep_only_tags = [dict(name='div', attrs={'id':['article-content', 'articlecontent']})]
|
||||||
remove_tags_after = dict(name='div',attrs={'class':'articlebody normal'})
|
remove_tags_after = dict(name='div',attrs={'class':'articlebody'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','base'])
|
dict(name=['object','link','base'])
|
||||||
,dict(name='div',attrs={'class':['articlead','factcolumn']})
|
,dict(name='div',attrs={'class':['articlead','factcolumn', 'article-ad']})
|
||||||
,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
|
,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
|
||||||
,dict(name='p', attrs={'class':'more'})
|
,dict(name='p', attrs={'class':'more'})
|
||||||
]
|
]
|
||||||
|
21
recipes/wnp.recipe
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||||
|
title = u'WNP'
|
||||||
|
cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Wirtualny Nowy Przemysł'
|
||||||
|
category = 'economy'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
|
||||||
|
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||||
|
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||||
|
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||||
|
(u'Serwis Hutnictwo', u'http://www.wnp.pl/rss/serwis_rss_3.xml'),
|
||||||
|
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
||||||
|
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
||||||
|
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
@ -53,6 +53,12 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def abs_wsj_url(self, href):
|
||||||
|
if not href.startswith('http'):
|
||||||
|
href = 'http://online.wsj.com' + href
|
||||||
|
return href
|
||||||
|
|
||||||
|
|
||||||
def wsj_get_index(self):
|
def wsj_get_index(self):
|
||||||
return self.index_to_soup('http://online.wsj.com/itp')
|
return self.index_to_soup('http://online.wsj.com/itp')
|
||||||
|
|
||||||
@ -83,14 +89,14 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
pageone = a['href'].endswith('pageone')
|
pageone = a['href'].endswith('pageone')
|
||||||
if pageone:
|
if pageone:
|
||||||
title = 'Front Section'
|
title = 'Front Section'
|
||||||
url = 'http://online.wsj.com' + a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
title = 'What''s News'
|
title = 'What''s News'
|
||||||
url = url.replace('pageone','whatsnews')
|
url = url.replace('pageone','whatsnews')
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
else:
|
else:
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = 'http://online.wsj.com' + a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
@ -146,7 +152,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
||||||
else:
|
else:
|
||||||
title = self.tag_to_string(a).strip()
|
title = self.tag_to_string(a).strip()
|
||||||
url = 'http://online.wsj.com'+a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
desc = ''
|
desc = ''
|
||||||
for p in container.findAll('p'):
|
for p in container.findAll('p'):
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
|
52
recipes/yagmur_dergisi.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Yagmur(BasicNewsRecipe):
|
||||||
|
title = u'Yagmur Dergisi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
|
||||||
|
oldest_article = 90
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
|
||||||
|
#publisher = ' '
|
||||||
|
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
encoding = 'ISO 8859-9'
|
||||||
|
publisher = 'thomass'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||||
|
|
||||||
|
#remove_attributes = ['aria-describedby']
|
||||||
|
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||||
|
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
||||||
|
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
||||||
|
#remove_tags_before = dict(id='content-right')
|
||||||
|
|
||||||
|
|
||||||
|
#remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
|
||||||
|
]
|
||||||
|
|
||||||
|
#def preprocess_html(self, soup):
|
||||||
|
# return self.adeify_images(soup)
|
||||||
|
def print_version(self, url): #there is a probem caused by table format
|
||||||
|
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')
|
||||||
|
|
52
recipes/yeni_umit_dergisi.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class YeniUmit(BasicNewsRecipe):
|
||||||
|
title = u'Yeni Umit Dergisi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'Aylık Dini İlimler ve Kültür Dergisi'
|
||||||
|
oldest_article = 45
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
|
||||||
|
#publisher = ' '
|
||||||
|
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
encoding = 'ISO 8859-9'
|
||||||
|
publisher = 'thomass'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||||
|
|
||||||
|
#remove_attributes = ['aria-describedby']
|
||||||
|
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||||
|
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
||||||
|
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
||||||
|
#remove_tags_before = dict(id='content-right')
|
||||||
|
|
||||||
|
|
||||||
|
#remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
|
||||||
|
]
|
||||||
|
|
||||||
|
#def preprocess_html(self, soup):
|
||||||
|
# return self.adeify_images(soup)
|
||||||
|
def print_version(self, url): #there is a probem caused by table format
|
||||||
|
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')
|
||||||
|
|
64
recipes/yenisafak_gazetesi.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Bugun (BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Yenişafak Gazetesi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'ISO 8859-9' #'UTF-8'
|
||||||
|
publisher = 'thomass'
|
||||||
|
category = 'news, haberler,TR,gazete'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'newspaper '
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
|
||||||
|
masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
|
||||||
|
extra_css = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
|
||||||
|
|
||||||
|
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||||
|
remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||||
|
|
||||||
|
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
remove_empty_feeds= True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
|
||||||
|
( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
|
||||||
|
( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
|
||||||
|
( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
|
||||||
|
( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
|
||||||
|
( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
|
||||||
|
( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
|
||||||
|
( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
|
||||||
|
( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
|
||||||
|
( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
|
||||||
|
( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
|
||||||
|
( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
|
||||||
|
( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
|
||||||
|
( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
|
||||||
|
( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
|
||||||
|
( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
|
||||||
|
( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
|
||||||
|
( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
|
||||||
|
( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
|
||||||
|
( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
|
||||||
|
( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
]
|
@ -62,10 +62,16 @@ authors_completer_append_separator = False
|
|||||||
# The author name suffixes are words that are ignored when they occur at the
|
# The author name suffixes are words that are ignored when they occur at the
|
||||||
# end of an author name. The case of the suffix is ignored and trailing
|
# end of an author name. The case of the suffix is ignored and trailing
|
||||||
# periods are automatically handled.
|
# periods are automatically handled.
|
||||||
|
# The author name copy words are a set of words which if they occur in an
|
||||||
|
# author name cause the automatically geenrated author sort string to be
|
||||||
|
# identical to the author name. This means that the sort for a string like Acme
|
||||||
|
# Inc. will be Acme Inc. instead of Inc., Acme
|
||||||
author_sort_copy_method = 'comma'
|
author_sort_copy_method = 'comma'
|
||||||
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
||||||
'MD', 'M.D', 'I', 'II', 'III', 'IV',
|
'MD', 'M.D', 'I', 'II', 'III', 'IV',
|
||||||
'Junior', 'Senior')
|
'Junior', 'Senior')
|
||||||
|
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
|
||||||
|
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
|
||||||
|
|
||||||
#: Use author sort in Tag Browser
|
#: Use author sort in Tag Browser
|
||||||
# Set which author field to display in the tags pane (the list of authors,
|
# Set which author field to display in the tags pane (the list of authors,
|
||||||
@ -181,7 +187,7 @@ save_template_title_series_sorting = 'library_order'
|
|||||||
# To disable use the expression: '^$'
|
# To disable use the expression: '^$'
|
||||||
# This expression is designed for articles that are followed by spaces. If you
|
# This expression is designed for articles that are followed by spaces. If you
|
||||||
# also need to match articles that are followed by other characters, for example L'
|
# also need to match articles that are followed by other characters, for example L'
|
||||||
# in French, use: r"^(A\s+|The\s+|An\s+|L')" instead.
|
# in French, use: "^(A\s+|The\s+|An\s+|L')" instead.
|
||||||
# Default: '^(A|The|An)\s+'
|
# Default: '^(A|The|An)\s+'
|
||||||
title_sort_articles=r'^(A|The|An)\s+'
|
title_sort_articles=r'^(A|The|An)\s+'
|
||||||
|
|
||||||
|
@ -17,8 +17,8 @@ class GUI(Command):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def find_forms(cls):
|
def find_forms(cls):
|
||||||
from calibre.gui2 import find_forms
|
# We do not use the calibre function find_forms as
|
||||||
return find_forms(cls.SRC)
|
# mporting calibre.gui2 may not work
|
||||||
forms = []
|
forms = []
|
||||||
for root, _, files in os.walk(cls.PATH):
|
for root, _, files in os.walk(cls.PATH):
|
||||||
for name in files:
|
for name in files:
|
||||||
@ -29,8 +29,9 @@ class GUI(Command):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def form_to_compiled_form(cls, form):
|
def form_to_compiled_form(cls, form):
|
||||||
from calibre.gui2 import form_to_compiled_form
|
# We do not use the calibre function form_to_compiled_form as
|
||||||
return form_to_compiled_form(form)
|
# importing calibre.gui2 may not work
|
||||||
|
return form.rpartition('.')[0]+'_ui.py'
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
self.build_forms()
|
self.build_forms()
|
||||||
|
@ -55,7 +55,7 @@ class Develop(Command):
|
|||||||
short_description = 'Setup a development environment for calibre'
|
short_description = 'Setup a development environment for calibre'
|
||||||
MODE = 0755
|
MODE = 0755
|
||||||
|
|
||||||
sub_commands = ['build', 'resources', 'gui']
|
sub_commands = ['build', 'resources', 'iso639', 'gui',]
|
||||||
|
|
||||||
def add_postinstall_options(self, parser):
|
def add_postinstall_options(self, parser):
|
||||||
parser.add_option('--make-errors-fatal', action='store_true', default=False,
|
parser.add_option('--make-errors-fatal', action='store_true', default=False,
|
||||||
|
@ -290,7 +290,7 @@ class LinuxFreeze(Command):
|
|||||||
|
|
||||||
launcher = textwrap.dedent('''\
|
launcher = textwrap.dedent('''\
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
path=`readlink -e $0`
|
path=`readlink -f $0`
|
||||||
base=`dirname $path`
|
base=`dirname $path`
|
||||||
lib=$base/lib
|
lib=$base/lib
|
||||||
export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
|
||||||
|
2169
setup/iso639.xml
3
setup/iso_639/README
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
These files are taken from the iso-codes package, licensed under the LGPL 2.1
|
||||||
|
|
||||||
|
All the files are from the iso_639_3 sub-directory.
|