Sync to trunk.
253
Changelog.yaml
@ -19,6 +19,259 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.48
|
||||||
|
date: 2012-04-20
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Conversion: The search and replace feature has been completely revamped."
|
||||||
|
description: "You can now use any number of search and replace
|
||||||
|
expression, not just three. You can also store and load frequently used
|
||||||
|
sets of search and replace expressions. Also, the wizard generates its
|
||||||
|
preview in a separate process to protect against crashes/memory leaks."
|
||||||
|
tickets: [983476,983484,983478]
|
||||||
|
|
||||||
|
- title: "Support for the new '.azw3' files that Amazon recently started generating. calibre will now detect them as ebooks. It can also view/convert them, if they are DRM free."
|
||||||
|
|
||||||
|
- title: "Drivers for Samsung Galaxy ACE GT-S5830L and HTC One X"
|
||||||
|
tickets: [981185]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Get Books: Support the new website design of Barnes & Noble"
|
||||||
|
|
||||||
|
- title: "T1 driver: Fix books sent to SD card sometimes resulting problems when deleted."
|
||||||
|
tickets: [943586]
|
||||||
|
|
||||||
|
- title: "Do not allow author names to be set to blank via the Manage authors function. Blank authors are now automatically set to 'Unknown'"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Handle background color specified on <td> and <tr> in addition to <table> tags."
|
||||||
|
tickets: [980813]
|
||||||
|
|
||||||
|
- title: "MOBI Output: Fix underline style applied to parent element not getting inherited by <a> children."
|
||||||
|
tickets: [985711]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- xkcd
|
||||||
|
- Metro Nieuws
|
||||||
|
- Calgary Herald
|
||||||
|
- Orlando Sentinel
|
||||||
|
- countryfile
|
||||||
|
- Heise
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Various new Polish news sources
|
||||||
|
author: fenuks
|
||||||
|
|
||||||
|
- title: Various Italian news sources
|
||||||
|
author: faber1971
|
||||||
|
|
||||||
|
- title: Jakarta Globe
|
||||||
|
author: rty
|
||||||
|
|
||||||
|
- title: Acim Bilim Dergisi
|
||||||
|
author: thomass
|
||||||
|
|
||||||
|
- version: 0.8.47
|
||||||
|
date: 2012-04-13
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Conversion pipeline: Add support for all the named entities in the HTML 5 spec."
|
||||||
|
tickets: [976056]
|
||||||
|
|
||||||
|
- title: "Support for viewing and converting the Haodoo PDB ebook format"
|
||||||
|
tickets: [976478]
|
||||||
|
|
||||||
|
- title: "Device driver for Laser EB720"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in automatic adding in 0.8.46 that broke automatic adding if adding of duplicates is enabled and auto convert is also enabled"
|
||||||
|
tickets: [976336]
|
||||||
|
|
||||||
|
- title: 'Fix "Tags" field in advanced search does not obey regex setting'
|
||||||
|
tickets: [980221]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Automatically extract cover image from simple HTML title page that consists of only a single <img> tag, instead of rendering the page"
|
||||||
|
|
||||||
|
- title: "Prevent errors when both author and author_sort are used in a template for reading metadata from filenames for files on a device"
|
||||||
|
|
||||||
|
- title: "Amazon metadata download: Handle books whose titles start with a bracket."
|
||||||
|
tickets: [976365]
|
||||||
|
|
||||||
|
- title: "Get Books: Fix downloading of purchased books from Baen"
|
||||||
|
tickets: [975929]
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Forbes
|
||||||
|
- Caros Amigos
|
||||||
|
- Trouw
|
||||||
|
- Sun UK
|
||||||
|
- Metro
|
||||||
|
- Daily Mirror
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Melbourne Herald Sun"
|
||||||
|
author: Ray Hartley
|
||||||
|
|
||||||
|
- title: "Editoriali and Zerocalcare"
|
||||||
|
author: faber1971
|
||||||
|
|
||||||
|
- version: 0.8.46
|
||||||
|
date: 2012-04-06
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Auto adding: When automatically adding files from a folder, automatically convert the files to the current output format after adding. This can be turned off via Preferences->Adding Books->Automatic Adding."
|
||||||
|
tickets: [969053]
|
||||||
|
|
||||||
|
- title: "E-book viewer: When reading a MOBI file that is actually a KF8 book, show the format as being KF8"
|
||||||
|
|
||||||
|
- title: "Content server: Workaround for android stock browser not support HTTP AUTH."
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: Change the remove unused series button to a clear series button (as the remove unused series function is now automatic)"
|
||||||
|
|
||||||
|
- title: "Driver for PocketBook 622."
|
||||||
|
tickets: [969875]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Run metadata downloads in a separate process to workaround memory leaks in third party plugins. Also removes the need to break up bulk metadata downloads into 100 book batches."
|
||||||
|
|
||||||
|
- title: "Make tag browser filtering work when capital letters are entered."
|
||||||
|
|
||||||
|
- title: "EPUB metadata: Ignore urn:isbn: prefix from ISBN declaration when reading metadata"
|
||||||
|
|
||||||
|
- title: "Get books: Fix feedbooks store not showing all available formats"
|
||||||
|
|
||||||
|
- title: "KF8 Input: When the KF8 book has no metadata ToC, try to extract the ToC from the HTML instead."
|
||||||
|
tickets: [969238]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke access to Preferences via the Preferences item in the calibre menu on OS X"
|
||||||
|
tickets: [969418]
|
||||||
|
|
||||||
|
- title: "Fix bug that ignored metadata specified on the command line when using calibredb add"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- OReilly Premium
|
||||||
|
- Real Clear
|
||||||
|
- Soldier's Magazine
|
||||||
|
- Rue89
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: The Southern Star
|
||||||
|
author: watou
|
||||||
|
|
||||||
|
- title: Buenos Aires Herald
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- version: 0.8.45
|
||||||
|
date: 2012-03-30
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "E-book viewer: Allow the up and down keys to scroll past section boundaries"
|
||||||
|
|
||||||
|
- title: "calibredb: Allow specification of basic metadata on the command line when adding books."
|
||||||
|
tickets: [951063]
|
||||||
|
|
||||||
|
- title: "Driver for Samsung Galaxy Plus GT-I9001"
|
||||||
|
|
||||||
|
- title: "KF8 Input: Support KF8 format Amazon book samples."
|
||||||
|
tickets: [963418]
|
||||||
|
|
||||||
|
- title: "When a new plugin is added to calibre for the first time, have its icon (if any) show up even when a device is connected (this can be changed by the user at the time of plugin installation)"
|
||||||
|
|
||||||
|
- title: "Add keyboard shortcuts for Bold, Italic and Underline to the comments editor in the edit metadata dialog"
|
||||||
|
tickets: [963559]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "E-book viewer: Fix last read position (and bookmarks in general) being inaccurate for some books."
|
||||||
|
description: "The technique for marking locations in books used by the viewer has changed. The new technique should be much more accurate than the last one, especially when the font size at which the book is being viewed is changed. Note that this change means that bookmarks created with this release of calibre will not be read by previous calibre versions. On a technical note, the viewer now uses the CFI specification from the EPUB 3 standard for bookmarks."
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "Workarounds for a few regressions in the user interface in 0.8.44 caused by the update to Qt 4.8.0"
|
||||||
|
|
||||||
|
- title: "Books list: Preserve the horizontal scroll position when sorting by a column"
|
||||||
|
|
||||||
|
- title: "Fix saving to disk and then adding the book back not restoring tags-like custom columns"
|
||||||
|
|
||||||
|
- title: "Linux installer: Fix completion for ebook-convert not working."
|
||||||
|
tickets: [967834]
|
||||||
|
|
||||||
|
- title: "MOBI Output: Recognize type=text in addition to type=start guide elements"
|
||||||
|
|
||||||
|
- title: "Get Books: Updates to Nexto, Ebookpoint and Woblink stores"
|
||||||
|
|
||||||
|
- title: "Fix unable to clear username/password in Fetch news dialog"
|
||||||
|
|
||||||
|
- title: "PDF Output: Fix margin specifications not being applied"
|
||||||
|
|
||||||
|
- title: "Linux installer: Manually preserve the defaults.list mimetype association file to workaround buggy xdg-desktop-menu implementations in some distros."
|
||||||
|
tickets: [926559]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Fix regression that caused the ebook viewer to stop functioning if it is launched from the main calibre program and then the main calibre program is closed."
|
||||||
|
tickets: [963960]
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Our Daily Bread
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: NRC Handelsblad (free)
|
||||||
|
author: veezh
|
||||||
|
|
||||||
|
- version: 0.8.44
|
||||||
|
date: 2012-03-23
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "E-book viewer: A whole new full screen mode."
|
||||||
|
description: "The new mode has no toolbars to distract from the text and the ability to set the width of the column of text via Preferences in the ebook viewer. Click the Fullscreen button on the toolbar in the viewer to enter fullscreen mode (or press the F11 or Ctrl+Shit+F keys)"
|
||||||
|
type: major
|
||||||
|
tickets: [959830]
|
||||||
|
|
||||||
|
- title: "Copy to Library: If books were auto merged by the copy to library process, popup a message telling the user about it, as otherwise some people forget they have turned on auto merge and accuse calibre of losing their books."
|
||||||
|
|
||||||
|
- title: "Unix driver for Ectaco JetBook color"
|
||||||
|
tickets: [958442]
|
||||||
|
|
||||||
|
- title: "Add a link to the 'Adding Books Preferences' in the drop down menu of the Add Books button for easier access and more prominence"
|
||||||
|
tickets: [958145]
|
||||||
|
|
||||||
|
- title: "Smarten punctuation: Add a few more cases for detecting opening and closing quotes"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Get Books: Updates to various store plugins to deal with website changes: Amazon Europe, Waterstones, Foyles, B&N, Kobo, Woblink and Empik"
|
||||||
|
|
||||||
|
- title: "Catalog generation: Do not error out when generating csv/xml catalogs if the catalog title contains filename invalid characters."
|
||||||
|
tickets: [960154]
|
||||||
|
|
||||||
|
- title: "RTF Output: Ignore corrupted images in the input document, instead of erroring out."
|
||||||
|
tickets: [959600]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Try to preserve page position when the window is resized"
|
||||||
|
|
||||||
|
- title: "Fix bug that caused wrong series to be shown when clicking on the first letter of a series group in the Tag Browser"
|
||||||
|
|
||||||
|
- title: "Fix calibre not supporting different http and https proxies."
|
||||||
|
tickets: [960173]
|
||||||
|
|
||||||
|
- title: "MOBI Input: Fix regression caused by KF8 support that broke reading of ancient non-Amazon PRC files"
|
||||||
|
|
||||||
|
- title: "Fix EPUB to EPUB conversion of an EPUB with obfuscated fonts resulting in the fonts not being readable in Adobe Digital Editions"
|
||||||
|
tickets: [957527]
|
||||||
|
|
||||||
|
- title: "RTF Output: Fix bug that broke conversion to RTF when the input document contains <img> tags with no src attribute."
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.43 that broke use of general mode templates that ended in a semi-colon."
|
||||||
|
tickets: [957295]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- b92
|
||||||
|
- Various Polish news sources
|
||||||
|
- Le Monde
|
||||||
|
- FHM UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Ivana Milakovic and Klub knjige
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
|
||||||
- version: 0.8.43
|
- version: 0.8.43
|
||||||
date: 2012-03-16
|
date: 2012-03-16
|
||||||
|
|
||||||
|
27
recipes/acim_bilim_dergisi.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1334868409(BasicNewsRecipe):
|
||||||
|
title = u'AÇIK BİLİM DERGİSİ'
|
||||||
|
description = ' Aylık çevrimiçi bilim dergisi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 300
|
||||||
|
auto_cleanup = True
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
publisher = 'açık bilim'
|
||||||
|
category = 'haber, bilim,TR,dergi'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'magazine '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||||
|
masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
|
@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 20
|
oldest_article = 20
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
index='http://www.adventure-zone.info/fusion/'
|
||||||
use_embedded_content=False
|
use_embedded_content=False
|
||||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||||
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
skip_tag = skip_tag.findAll(name='a')
|
skip_tag = skip_tag.findAll(name='a')
|
||||||
for r in skip_tag:
|
for r in skip_tag:
|
||||||
if r.strong:
|
if r.strong:
|
||||||
word=r.strong.string
|
word=r.strong.string.lower()
|
||||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
|
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
footer=soup.find(attrs={'class':'news-footer middle-border'})
|
||||||
|
if footer and len(footer('a'))>=2:
|
||||||
|
footer('a')[1].extract()
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
@ -6,6 +6,7 @@ class Android_com_pl(BasicNewsRecipe):
|
|||||||
description = 'Android.com.pl - biggest polish Android site'
|
description = 'Android.com.pl - biggest polish Android site'
|
||||||
category = 'Android, mobile'
|
category = 'Android, mobile'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
'''
|
'''
|
||||||
@ -20,13 +20,13 @@ class B92(BasicNewsRecipe):
|
|||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'http://www.b92.net/images/fp/logo.gif'
|
masthead_url = 'http://b92s.net/v4/img/new-logo.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
.articledescription{font-family: serif1, serif}
|
|
||||||
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
||||||
|
img{display: block}
|
||||||
|
.sms{font-weight: bold}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -37,11 +37,17 @@ class B92(BasicNewsRecipe):
|
|||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [
|
||||||
|
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
||||||
|
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>')
|
||||||
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
||||||
remove_attributes = ['width','height','align','hspace','vspace','border']
|
remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb']
|
||||||
remove_tags = [dict(name=['embed','link','base','meta'])]
|
remove_tags = [
|
||||||
|
dict(name=['embed','link','base','meta','iframe'])
|
||||||
|
,dict(attrs={'id':'social'})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
||||||
|
80
recipes/ba_herald.recipe
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.buenosairesherald.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BuenosAiresHerald(BasicNewsRecipe):
|
||||||
|
title = 'Buenos Aires Herald'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'A world of information in a few words'
|
||||||
|
publisher = 'Editorial Nefir S.A.'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_AR'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
||||||
|
INDEX = 'http://www.buenosairesherald.com'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
h1{font-family: Georgia,serif}
|
||||||
|
#fecha{text-align: right; font-size: small}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['meta','link','iframe'])]
|
||||||
|
keep_only_tags = [dict(attrs={'class':'nota_texto p'})]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Argentina' , u'http://www.buenosairesherald.com/argentina' )
|
||||||
|
,(u'World' , u'http://www.buenosairesherald.com/world' )
|
||||||
|
,(u'Latin America' , u'http://www.buenosairesherald.com/latin-america' )
|
||||||
|
,(u'Entertainment' , u'http://www.buenosairesherald.com/entertainment' )
|
||||||
|
,(u'Sports' , u'http://www.buenosairesherald.com/sports' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
artidraw = url.rpartition('/article/')[2]
|
||||||
|
artid = artidraw.partition('/')[0]
|
||||||
|
return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}):
|
||||||
|
description = self.tag_to_string(item.h2)
|
||||||
|
atag = item.h2.find('a')
|
||||||
|
if atag and atag.has_key('href'):
|
||||||
|
url = self.INDEX + atag['href']
|
||||||
|
title = description
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
|
|||||||
self.image_article(soup, soup.body)
|
self.image_article(soup, soup.body)
|
||||||
else:
|
else:
|
||||||
self.append_page(soup, soup.body)
|
self.append_page(soup, soup.body)
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.INDEX + a['href']
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,220 +1,35 @@
|
|||||||
#!/usr/bin/env python
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
class CalgaryHerald(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
title = u'Calgary Herald'
|
||||||
|
oldest_article = 3
|
||||||
'''
|
max_articles_per_feed = 100
|
||||||
www.canada.com
|
|
||||||
'''
|
feeds = [
|
||||||
|
(u'News', u'http://rss.canada.com/get/?F233'),
|
||||||
import re
|
(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
(u'Politics', u'http://rss.canada.com/get/?F7551'),
|
||||||
|
(u'National', u'http://rss.canada.com/get/?F7552'),
|
||||||
|
(u'World', u'http://rss.canada.com/get/?F7553'),
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
]
|
||||||
|
__author__ = 'rty'
|
||||||
# un-comment the following four lines for the Victoria Times Colonist
|
pubisher = 'Calgary Herald'
|
||||||
## title = u'Victoria Times Colonist'
|
description = 'Calgary, Alberta, Canada'
|
||||||
## url_prefix = 'http://www.timescolonist.com'
|
category = 'News, Calgary, Alberta, Canada'
|
||||||
## description = u'News from Victoria, BC'
|
|
||||||
## fp_tag = 'CAN_TC'
|
|
||||||
|
remove_javascript = True
|
||||||
# un-comment the following four lines for the Vancouver Province
|
use_embedded_content = False
|
||||||
## title = u'Vancouver Province'
|
no_stylesheets = True
|
||||||
## url_prefix = 'http://www.theprovince.com'
|
language = 'en_CA'
|
||||||
## description = u'News from Vancouver, BC'
|
encoding = 'utf-8'
|
||||||
## fp_tag = 'CAN_VP'
|
conversion_options = {'linearize_tables':True}
|
||||||
|
##masthead_url = 'http://www.calgaryherald.com/index.html'
|
||||||
# un-comment the following four lines for the Vancouver Sun
|
keep_only_tags = [
|
||||||
## title = u'Vancouver Sun'
|
dict(name='div', attrs={'id':'storyheader'}),
|
||||||
## url_prefix = 'http://www.vancouversun.com'
|
dict(name='div', attrs={'id':'storycontent'})
|
||||||
## description = u'News from Vancouver, BC'
|
|
||||||
## fp_tag = 'CAN_VS'
|
]
|
||||||
|
remove_tags_after = {'class':"story_tool_hr"}
|
||||||
# un-comment the following four lines for the Edmonton Journal
|
|
||||||
## title = u'Edmonton Journal'
|
|
||||||
## url_prefix = 'http://www.edmontonjournal.com'
|
|
||||||
## description = u'News from Edmonton, AB'
|
|
||||||
## fp_tag = 'CAN_EJ'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Calgary Herald
|
|
||||||
title = u'Calgary Herald'
|
|
||||||
url_prefix = 'http://www.calgaryherald.com'
|
|
||||||
description = u'News from Calgary, AB'
|
|
||||||
fp_tag = 'CAN_CH'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Regina Leader-Post
|
|
||||||
## title = u'Regina Leader-Post'
|
|
||||||
## url_prefix = 'http://www.leaderpost.com'
|
|
||||||
## description = u'News from Regina, SK'
|
|
||||||
## fp_tag = ''
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
|
||||||
## title = u'Saskatoon Star-Phoenix'
|
|
||||||
## url_prefix = 'http://www.thestarphoenix.com'
|
|
||||||
## description = u'News from Saskatoon, SK'
|
|
||||||
## fp_tag = ''
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Windsor Star
|
|
||||||
## title = u'Windsor Star'
|
|
||||||
## url_prefix = 'http://www.windsorstar.com'
|
|
||||||
## description = u'News from Windsor, ON'
|
|
||||||
## fp_tag = 'CAN_'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Ottawa Citizen
|
|
||||||
## title = u'Ottawa Citizen'
|
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
|
||||||
## description = u'News from Ottawa, ON'
|
|
||||||
## fp_tag = 'CAN_OC'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Montreal Gazette
|
|
||||||
## title = u'Montreal Gazette'
|
|
||||||
## url_prefix = 'http://www.montrealgazette.com'
|
|
||||||
## description = u'News from Montreal, QC'
|
|
||||||
## fp_tag = 'CAN_MG'
|
|
||||||
|
|
||||||
|
|
||||||
language = 'en_CA'
|
|
||||||
__author__ = 'Nick Redding'
|
|
||||||
no_stylesheets = True
|
|
||||||
timefmt = ' [%b %d]'
|
|
||||||
extra_css = '''
|
|
||||||
.timestamp { font-size:xx-small; display: block; }
|
|
||||||
#storyheader { font-size: medium; }
|
|
||||||
#storyheader h1 { font-size: x-large; }
|
|
||||||
#storyheader h2 { font-size: large; font-style: italic; }
|
|
||||||
.byline { font-size:xx-small; }
|
|
||||||
#photocaption { font-size: small; font-style: italic }
|
|
||||||
#photocredit { font-size: xx-small; }'''
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
|
||||||
remove_tags = [{'class':'comments'},
|
|
||||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
|
||||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
|
||||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
|
||||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
|
||||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
|
||||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
from datetime import timedelta, date
|
|
||||||
if self.fp_tag=='':
|
|
||||||
return None
|
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
|
||||||
br = BasicNewsRecipe.get_browser()
|
|
||||||
daysback=1
|
|
||||||
try:
|
|
||||||
br.open(cover)
|
|
||||||
except:
|
|
||||||
while daysback<7:
|
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
|
||||||
br = BasicNewsRecipe.get_browser()
|
|
||||||
try:
|
|
||||||
br.open(cover)
|
|
||||||
except:
|
|
||||||
daysback = daysback+1
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
if daysback==7:
|
|
||||||
self.log("\nCover unavailable")
|
|
||||||
cover = None
|
|
||||||
return cover
|
|
||||||
|
|
||||||
def fixChars(self,string):
|
|
||||||
# Replace lsquo (\x91)
|
|
||||||
fixed = re.sub("\x91","‘",string)
|
|
||||||
# Replace rsquo (\x92)
|
|
||||||
fixed = re.sub("\x92","’",fixed)
|
|
||||||
# Replace ldquo (\x93)
|
|
||||||
fixed = re.sub("\x93","“",fixed)
|
|
||||||
# Replace rdquo (\x94)
|
|
||||||
fixed = re.sub("\x94","”",fixed)
|
|
||||||
# Replace ndash (\x96)
|
|
||||||
fixed = re.sub("\x96","–",fixed)
|
|
||||||
# Replace mdash (\x97)
|
|
||||||
fixed = re.sub("\x97","—",fixed)
|
|
||||||
fixed = re.sub("’","’",fixed)
|
|
||||||
return fixed
|
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
|
||||||
# Kindle TOC descriptions won't render certain characters
|
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&","&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
if first:
|
|
||||||
picdiv = soup.find('body').find('img')
|
|
||||||
if picdiv is not None:
|
|
||||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
|
||||||
xtitle = article.text_summary.strip()
|
|
||||||
if len(xtitle) == 0:
|
|
||||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
|
||||||
if desc is not None:
|
|
||||||
article.summary = article.text_summary = desc['content']
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.strip_anchors(soup)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
|
||||||
|
|
||||||
articles = {}
|
|
||||||
key = 'News'
|
|
||||||
ans = ['News']
|
|
||||||
|
|
||||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
|
||||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
|
||||||
#self.log(" div class = %s" % divtag['class'])
|
|
||||||
if divtag['class'].startswith('section_title'):
|
|
||||||
# div contains section title
|
|
||||||
if not divtag.h3:
|
|
||||||
continue
|
|
||||||
key = self.tag_to_string(divtag.h3,False)
|
|
||||||
ans.append(key)
|
|
||||||
self.log("Section name %s" % key)
|
|
||||||
continue
|
|
||||||
# div contains article data
|
|
||||||
h1tag = divtag.find('h1')
|
|
||||||
if not h1tag:
|
|
||||||
continue
|
|
||||||
atag = h1tag.find('a',href=True)
|
|
||||||
if not atag:
|
|
||||||
continue
|
|
||||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
|
||||||
#self.log("Section %s" % key)
|
|
||||||
#self.log("url %s" % url)
|
|
||||||
title = self.tag_to_string(atag,False)
|
|
||||||
#self.log("title %s" % title)
|
|
||||||
pubdate = ''
|
|
||||||
description = ''
|
|
||||||
ptag = divtag.find('p');
|
|
||||||
if ptag:
|
|
||||||
description = self.tag_to_string(ptag,False)
|
|
||||||
#self.log("description %s" % description)
|
|
||||||
author = ''
|
|
||||||
autag = divtag.find('h4')
|
|
||||||
if autag:
|
|
||||||
author = self.tag_to_string(autag,False)
|
|
||||||
#self.log("author %s" % author)
|
|
||||||
if not articles.has_key(key):
|
|
||||||
articles[key] = []
|
|
||||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
|
||||||
return ans
|
|
||||||
|
17
recipes/camera_di_commercio_di_bari.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1331729727(BasicNewsRecipe):
|
||||||
|
title = u'Camera di Commercio di Bari'
|
||||||
|
oldest_article = 7
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'News from the Chamber of Commerce of Bari'
|
||||||
|
language = 'it'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png'
|
||||||
|
feeds = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')]
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, faber1971'
|
||||||
|
__version__ = 'v1.00'
|
||||||
|
__date__ = '17, April 2012'
|
@ -1,7 +1,5 @@
|
|||||||
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
|
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
|
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
|
||||||
title = u'Caros Amigos'
|
title = u'Caros Amigos'
|
||||||
oldest_article = 20
|
oldest_article = 20
|
||||||
@ -9,9 +7,8 @@ class AdvancedUserRecipe1311839910(BasicNewsRecipe):
|
|||||||
language = 'pt_BR'
|
language = 'pt_BR'
|
||||||
__author__ = 'Pablo Aldama'
|
__author__ = 'Pablo Aldama'
|
||||||
|
|
||||||
feeds = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index/index.php?format=feed&type=rss')]
|
feeds = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index2/index.php?format=feed&type=rss')]
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['blog']})
|
keep_only_tags = [dict(name='div', attrs={'class':['blog']})
|
||||||
,dict(name='div', attrs={'class':['blogcontent']})
|
,dict(name='div', attrs={'class':['blogcontent']})
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':'addtoany'})]
|
remove_tags = [dict(name='div', attrs={'class':'addtoany'})]
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
|
|||||||
description = 'cdaction.pl - polish games magazine site'
|
description = 'cdaction.pl - polish games magazine site'
|
||||||
category = 'games'
|
category = 'games'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://www.cdaction.pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
@ -17,4 +18,10 @@ class CD_Action(BasicNewsRecipe):
|
|||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
||||||
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
return getattr(self, 'cover_url', self.cover_url)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class CGM(BasicNewsRecipe):
|
class CGM(BasicNewsRecipe):
|
||||||
title = u'CGM'
|
title = u'CGM'
|
||||||
@ -17,9 +18,9 @@ class CGM(BasicNewsRecipe):
|
|||||||
remove_tags_before=dict(id='mainContent')
|
remove_tags_before=dict(id='mainContent')
|
||||||
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||||
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
||||||
dict(id=['movieShare', 'container'])]
|
dict(id=['movieShare', 'container'])]
|
||||||
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
||||||
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
||||||
|
|
||||||
|
|
||||||
@ -33,10 +34,12 @@ class CGM(BasicNewsRecipe):
|
|||||||
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
||||||
gallery.contents[1].name='img'
|
gallery.contents[1].name='img'
|
||||||
gallery.contents[1]['src']=img
|
gallery.contents[1]['src']=img
|
||||||
|
pos = len(gallery.contents)
|
||||||
|
gallery.insert(pos, BeautifulSoup('<br />'))
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ad=soup.findAll('a')
|
ad=soup.findAll('a')
|
||||||
for r in ad:
|
for r in ad:
|
||||||
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
|
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
|
||||||
r.extract()
|
r.extract()
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
|
from calibre import browser
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'Countryfile.com'
|
title = u'Countryfile.com'
|
||||||
cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
|
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
description = 'The official website of Countryfile Magazine'
|
description = 'The official website of Countryfile Magazine'
|
||||||
# last updated 29/1/12
|
# last updated 15/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
@ -13,7 +14,23 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
#articles_are_obfuscated = True
|
#articles_are_obfuscated = True
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.countryfile.com/')
|
||||||
|
cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
|
||||||
|
#print '******** ',cov,' ***'
|
||||||
|
cov2 = str(cov)
|
||||||
|
cov2=cov2[124:-90]
|
||||||
|
#print '******** ',cov2,' ***'
|
||||||
|
|
||||||
|
# try to get cover - if can't get known cover
|
||||||
|
br = browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
|
try:
|
||||||
|
br.open_novisit(cov2)
|
||||||
|
cover_url = cov2
|
||||||
|
except:
|
||||||
|
cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||||
|
return cover_url
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
# dict(attrs={'class' : ['player']}),
|
# dict(attrs={'class' : ['player']}),
|
||||||
|
|
||||||
|
@ -1,20 +1,21 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
import mechanize
|
||||||
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||||
title = u'The Daily Mirror'
|
title = u'The Daily Mirror'
|
||||||
description = 'News as provide by The Daily Mirror -UK'
|
description = 'News as provide by The Daily Mirror -UK'
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 11/2/12
|
# last updated 7/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
|
||||||
|
|
||||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||||
|
|
||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 5
|
max_articles_per_feed = 10
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -75,3 +76,28 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
img { display:block}
|
img { display:block}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||||
|
# look for the block containing the mirror button and url
|
||||||
|
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
|
||||||
|
cov2 = str(cov)
|
||||||
|
cov2='http://www.politicshome.com'+cov2[9:-142]
|
||||||
|
#cov2 now contains url of the page containing pic
|
||||||
|
soup = self.index_to_soup(cov2)
|
||||||
|
cov = soup.find(attrs={'id' : 'large'})
|
||||||
|
cov2 = str(cov)
|
||||||
|
cov2=cov2[27:-18]
|
||||||
|
#cov2 now is pic url, now go back to original function
|
||||||
|
br = mechanize.Browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
|
try:
|
||||||
|
br.open_novisit(cov2)
|
||||||
|
cover_url = cov2
|
||||||
|
except:
|
||||||
|
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||||
|
|
||||||
|
#cover_url = cov2
|
||||||
|
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
index='http://www.dobreprogramy.pl/'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
extra_css = '.title {font-size:22px;}'
|
extra_css = '.title {font-size:22px;}'
|
||||||
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
|||||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||||
category = 'history'
|
category = 'history'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://dzieje.pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript=True
|
remove_javascript=True
|
||||||
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
|
|||||||
remove_tags_after= dict(id='dogory')
|
remove_tags_after= dict(id='dogory')
|
||||||
remove_tags=[dict(id='dogory')]
|
remove_tags=[dict(id='dogory')]
|
||||||
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
16
recipes/editoriali.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '7, April 2012'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1332847053(BasicNewsRecipe):
|
||||||
|
title = u'Editoriali'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'Leading articles on Italy by the best Italian editorials'
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
conversion_options = {'linearize_tables': True}
|
||||||
|
masthead_url = 'http://folkbulletin.folkest.com/wp-content/uploads/editoriale1.jpg'
|
||||||
|
feeds = [(u'Micromega', u'http://temi.repubblica.it/micromega-online/feed/'), (u'Corriere della Sera', u'http://xml.corriereobjects.it/rss/editoriali.xml'), (u'La Stampa', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'), (u"Italia dall'estero", u'http://italiadallestero.info/feed')]
|
@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
|
|||||||
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
||||||
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Elektroda(BasicNewsRecipe):
|
class Elektroda(BasicNewsRecipe):
|
||||||
title = u'Elektroda'
|
title = u'Elektroda'
|
||||||
@ -13,3 +14,18 @@ class Elektroda(BasicNewsRecipe):
|
|||||||
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
||||||
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
||||||
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
tag=soup.find('span', attrs={'class':'postbody'})
|
||||||
|
if tag:
|
||||||
|
pos = len(tag.contents)
|
||||||
|
tag.insert(pos, BeautifulSoup('<br />'))
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_feeds (self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
article.title=article.title[article.title.find("::")+3:]
|
||||||
|
return feeds
|
||||||
|
@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
|
|||||||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
||||||
category = 'music'
|
category = 'music'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://www.emuzyka.pl'
|
||||||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
|
|||||||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
||||||
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
||||||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -3,10 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'FHM UK'
|
title = u'FHM UK'
|
||||||
description = 'Good News for Men'
|
description = 'Good News for Men'
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
|
||||||
|
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 27/1/12
|
# last updated 14/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 28
|
oldest_article = 28
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 12
|
||||||
@ -27,8 +28,11 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
|
|
||||||
#]
|
#]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
(u'From the Homepage',u'http://feed43.com/0032328550253453.xml'),
|
||||||
|
#http://feed43.com/8053226782885416.xml'),
|
||||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
||||||
(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
|
||||||
(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||||
]
|
#(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
||||||
|
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
|
||||||
|
]
|
||||||
|
@ -7,13 +7,14 @@ class Filmweb_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
||||||
category = 'movies'
|
category = 'movies'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://www.filmweb.pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
remove_empty_feeds=True
|
remove_empty_feeds=True
|
||||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||||
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||||
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
|
|||||||
self.log.warn(skip_tag)
|
self.log.warn(skip_tag)
|
||||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -1,39 +1,49 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Forbes(BasicNewsRecipe):
|
class Forbes(BasicNewsRecipe):
|
||||||
title = u'Forbes'
|
title = u'Forbes'
|
||||||
description = 'Business and Financial News'
|
description = 'Business and Financial News'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Kovid Goyal'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 20
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
recursions = 1
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
html2lrf_options = ['--base-font-size', '10']
|
|
||||||
|
|
||||||
cover_url = u'http://www.forbes.com/media/current_covers/forbes_120_160.gif'
|
cover_url = u'http://www.forbes.com/media/current_covers/forbes_120_160.gif'
|
||||||
|
|
||||||
feeds = [(u'Latest', u'http://www.forbes.com/news/index.xml'),
|
|
||||||
(u'Most Popular', u'http://www.forbes.com/feeds/popstories.xml'),
|
|
||||||
(u'Most Emailed', u'http://www.forbes.com/feeds/mostemailed.xml'),
|
|
||||||
(u'Faces', u'http://www.forbes.com/facesscan/index.xml'),
|
|
||||||
(u'Technology', u'http://www.forbes.com/technology/index.xml'),
|
|
||||||
(u'Personal Tech', u'http://www.forbes.com/personaltech/index.xml'),
|
|
||||||
(u'Wireless', u'http://www.forbes.com/wireless/index.xml'),
|
|
||||||
(u'Business', u'http://www.forbes.com/business/index.xml'),
|
|
||||||
(u'Sports Money', u'http://www.forbes.com/sportsmoney/index.xml'),
|
|
||||||
(u'Sports', u'http://www.forbes.com/forbeslife/sports/index.xml'),
|
|
||||||
(u'Vehicles', u'http://www.forbes.com/forbeslife/vehicles/index.xml'),
|
|
||||||
(u'Leadership', u'http://www.forbes.com/leadership/index.xml'),
|
|
||||||
(u'Careers', u'http://www.forbes.com/leadership/careers/index.xml'),
|
|
||||||
(u'Compensation', u'http://www.forbes.com/leadership/compensation/index.xml'),
|
|
||||||
(u'Managing', u'http://www.forbes.com/leadership/managing/index.xml')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
feeds = [(u'Latest', u'http://www.forbes.com/news/index.xml'),
|
||||||
raw = self.browser.open(url).read()
|
(u'Most Popular', u'http://www.forbes.com/feeds/popstories.xml'),
|
||||||
soup = BeautifulSoup(raw.decode('latin1', 'replace'))
|
(u'Technology', u'http://www.forbes.com/technology/index.xml'),
|
||||||
print_link = soup.find('a', {'onclick':"s_linkTrackVars='prop18';s_linkType='o';s_linkName='Print';if(typeof(globalPageName)!='undefined')s_prop18=globalPageName;s_lnk=s_co(this);s_gs(s_account);"})
|
(u'Business', u'http://www.forbes.com/business/index.xml'),
|
||||||
if print_link is None:
|
(u'Sports Money', u'http://www.forbes.com/sportsmoney/index.xml'),
|
||||||
return ''
|
(u'Leadership', u'http://www.forbes.com/leadership/index.xml'),]
|
||||||
return 'http://www.forbes.com' + print_link['href']
|
|
||||||
|
keep_only_tags = \
|
||||||
|
{'class':lambda x: x and (set(x.split()) & {'body', 'pagination',
|
||||||
|
'articleHead', 'article_head'})}
|
||||||
|
remove_tags_before = {'name':'h1'}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':['comment_bug', 'engagement_block',
|
||||||
|
'video_promo_block', 'article_actions']},
|
||||||
|
{'id':'comments'}
|
||||||
|
]
|
||||||
|
|
||||||
|
def is_link_wanted(self, url, tag):
|
||||||
|
ans = re.match(r'http://.*/[2-9]/', url) is not None
|
||||||
|
if ans:
|
||||||
|
self.log('Following multipage link: %s'%url)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for pag in soup.findAll(True, 'pagination'):
|
||||||
|
pag.extract()
|
||||||
|
if not first_fetch:
|
||||||
|
h1 = soup.find('h1')
|
||||||
|
if h1 is not None:
|
||||||
|
h1.extract()
|
||||||
|
return soup
|
||||||
|
|
||||||
|
16
recipes/fotoblogia_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Fotoblogia_pl(BasicNewsRecipe):
|
||||||
|
title = u'Fotoblogia.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
category = 'photography'
|
||||||
|
language = 'pl'
|
||||||
|
masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
|
||||||
|
cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})]
|
||||||
|
remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
|
||||||
|
feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
|
@ -6,16 +6,24 @@ class Gameplay_pl(BasicNewsRecipe):
|
|||||||
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
||||||
category = 'games, movies, books, music'
|
category = 'games, movies, books, music'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://gameplay.pl'
|
||||||
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||||
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript= True
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
||||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
|
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
|
||||||
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
if 'http' not in url:
|
if 'http' not in url:
|
||||||
return 'http://gameplay.pl'+ url[2:]
|
return 'http://gameplay.pl'+ url[2:]
|
||||||
else:
|
else:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and '../' in a['href']:
|
||||||
|
a['href']=self.index + a['href'][2:]
|
||||||
|
return soup
|
@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds=True
|
||||||
no_stylesheets=True
|
no_stylesheets=True
|
||||||
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
||||||
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
||||||
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
|
|||||||
self.log.warn('odnosnik')
|
self.log.warn('odnosnik')
|
||||||
self.log.warn(link['href'])
|
self.log.warn(link['href'])
|
||||||
return self.index_to_soup(link['href'], raw=True)
|
return self.index_to_soup(link['href'], raw=True)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
if '/gry/' in a['href']:
|
||||||
|
a['href']='http://www.gry.gildia.pl' + a['href']
|
||||||
|
elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
|
||||||
|
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||||
|
elif u'komiks' in soup.title.string.lower():
|
||||||
|
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||||
|
else:
|
||||||
|
a['href']='http://www.gildia.pl' + a['href']
|
||||||
|
return soup
|
||||||
|
@ -7,14 +7,15 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
category = 'games'
|
category = 'games'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
|
index='http://www.gram.pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
extra_css = 'h2 {font-style: italic; font-size:20px;}'
|
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||||
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
||||||
|
|
||||||
def parse_feeds (self):
|
def parse_feeds (self):
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
@ -23,3 +24,36 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
while nexturl:
|
||||||
|
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
pagetext = soup2.find(attrs={'class':'main'})
|
||||||
|
r=pagetext.find('h1')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
r=pagetext.find('h2')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
for r in pagetext.findAll('script'):
|
||||||
|
r.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||||
|
for t in tag:
|
||||||
|
t['style']='float: left;'
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -59,6 +59,7 @@ class heiseDe(BasicNewsRecipe):
|
|||||||
dict(name='span', attrs={'class':'rsaquo'}),
|
dict(name='span', attrs={'class':'rsaquo'}),
|
||||||
dict(name='div', attrs={'class':'news_logo'}),
|
dict(name='div', attrs={'class':'news_logo'}),
|
||||||
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
|
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
|
||||||
|
dict(name='div', attrs={'class':'navi_top_container'}),
|
||||||
dict(name='p', attrs={'class':'news_option'}),
|
dict(name='p', attrs={'class':'news_option'}),
|
||||||
dict(name='p', attrs={'class':'news_navi'}),
|
dict(name='p', attrs={'class':'news_navi'}),
|
||||||
dict(name='div', attrs={'class':'news_foren'})]
|
dict(name='div', attrs={'class':'news_foren'})]
|
||||||
@ -69,3 +70,5 @@ class heiseDe(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ class HighCountryNews(BasicNewsRecipe):
|
|||||||
__author__ = 'Armin Geller' # 2012-01-31
|
__author__ = 'Armin Geller' # 2012-01-31
|
||||||
publisher = 'High Country News'
|
publisher = 'High Country News'
|
||||||
timefmt = ' [%a, %d %b %Y]'
|
timefmt = ' [%a, %d %b %Y]'
|
||||||
language = 'en-Us'
|
language = 'en'
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
20
recipes/historia_news.recipe
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class historia_news(BasicNewsRecipe):
|
||||||
|
title = u'historia-news'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Historia-news to portal dla ludzi kochających historię. Najnowsze newsy z historii bliższej i dalszej, archeologii, paleontologii oraz ciekawostki i podcasty z historii kultury, sportu, motoryzacji i inne.'
|
||||||
|
masthead_url = 'http://historia-news.pl/templates/hajak4/images/header.jpg'
|
||||||
|
cover_url= 'http://www.historia-news.pl/templates/hajak4/images/header.jpg'
|
||||||
|
category = 'history'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags=[dict(name='form'), dict(name='img', attrs={'alt':'Print'}), dict(attrs={'class':['commbutt', 'cpr']}), dict(id=['plusone', 'facebook'])]
|
||||||
|
feeds = [(u'Wiadomo\u015bci', u'http://historia-news.pl/wiadomoci.feed?type=rss'), (u'Artyku\u0142y', u'http://historia-news.pl/artykuy.feed?type=rss')]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?tmpl=component&print=1&layout=default&page='
|
Before Width: | Height: | Size: 413 B After Width: | Height: | Size: 1.5 KiB |
BIN
recipes/icons/ba_herald.png
Normal file
After Width: | Height: | Size: 978 B |
BIN
recipes/icons/fotoblogia_pl.png
Normal file
After Width: | Height: | Size: 379 B |
BIN
recipes/icons/historia_news.png
Normal file
After Width: | Height: | Size: 833 B |
BIN
recipes/icons/swiat_obrazu.png
Normal file
After Width: | Height: | Size: 1006 B |
BIN
recipes/icons/telam.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
|
|||||||
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
|
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
index='http://www.in4.pl/'
|
||||||
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
|
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body)
|
self.append_page(soup, soup.body)
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
|
|||||||
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
||||||
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
|
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
|
||||||
category = 'UFO'
|
category = 'UFO'
|
||||||
|
index='http://infra.org.pl'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheers=True
|
no_stylesheers=True
|
||||||
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
|
|||||||
remove_tags_after=dict(attrs={'class':'pagenav'})
|
remove_tags_after=dict(attrs={'class':'pagenav'})
|
||||||
remove_tags=[dict(attrs={'class':'pagenav'})]
|
remove_tags=[dict(attrs={'class':'pagenav'})]
|
||||||
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
|
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
43
recipes/ivanamilakovic.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
ivanamilakovic.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class IvanaMilakovic(BasicNewsRecipe):
|
||||||
|
title = u'Ivana Milaković'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = u'Hronika mačijeg škrabala - priče, inspiracija, knjige, pisanje, prevodi...'
|
||||||
|
oldest_article = 80
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
|
||||||
|
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'knjige, blog, srbija, sf'
|
||||||
|
, 'publisher': 'Ivana Milakovic'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://ivanamilakovic.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
34
recipes/jakarta_globe.recipe
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JakartaGlobe(BasicNewsRecipe):
|
||||||
|
title = u'Jakarta Globe'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
|
||||||
|
(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
|
||||||
|
(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
|
||||||
|
(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
|
||||||
|
(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
|
||||||
|
(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
|
||||||
|
]
|
||||||
|
__author__ = 'rty'
|
||||||
|
pubisher = 'JakartaGlobe.com'
|
||||||
|
description = 'JakartaGlobe, Indonesia, Newspaper'
|
||||||
|
category = 'News, Indonesia'
|
||||||
|
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en_ID'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'story'}),
|
||||||
|
dict(name='span', attrs={'class':'headline'}),
|
||||||
|
dict(name='div', attrs={'class':'story'}),
|
||||||
|
dict(name='p', attrs={'id':'bodytext'})
|
||||||
|
]
|
42
recipes/klubknjige.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
klub-knjige.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class KlubKnjige(BasicNewsRecipe):
|
||||||
|
title = 'Klub knjige'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'literarni blog'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
|
||||||
|
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'knjige, blog, srbija, sf'
|
||||||
|
, 'publisher': 'Klub Knjige'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://klub-knjige.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
@ -1,5 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Konflikty(BasicNewsRecipe):
|
class Konflikty(BasicNewsRecipe):
|
||||||
title = u'Konflikty Zbrojne'
|
title = u'Konflikty Zbrojne'
|
||||||
@ -10,6 +11,23 @@ class Konflikty(BasicNewsRecipe):
|
|||||||
category='military, history'
|
category='military, history'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
no_stylesheets = True
|
||||||
|
keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
|
||||||
|
|
||||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
|
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
|
||||||
|
(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
|
||||||
|
(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
|
||||||
|
(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
|
||||||
|
(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
|
||||||
|
(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
|
||||||
|
(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for image in soup.findAll(name='a', attrs={'class':'image'}):
|
||||||
|
if image.img and image.img.has_key('alt'):
|
||||||
|
image.name='div'
|
||||||
|
pos = len(image.contents)
|
||||||
|
image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
|
||||||
|
return soup
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011'
|
__copyright__ = '2012'
|
||||||
'''
|
'''
|
||||||
lemonde.fr
|
lemonde.fr
|
||||||
'''
|
'''
|
||||||
@ -25,7 +25,7 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
.ariane{font-size:xx-small;}
|
.ariane{font-size:xx-small;}
|
||||||
.source{font-size:xx-small;}
|
.source{font-size:xx-small;}
|
||||||
#.href{font-size:xx-small;}
|
#.href{font-size:xx-small;}
|
||||||
.LM_caption{color:#666666; font-size:x-small;}
|
#.figcaption style{color:#666666; font-size:x-small;}
|
||||||
#.main-article-info{font-family:Arial,Helvetica,sans-serif;}
|
#.main-article-info{font-family:Arial,Helvetica,sans-serif;}
|
||||||
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||||
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||||
@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
tstr = alink.string
|
tstr = alink.string
|
||||||
alink.replaceWith(tstr)
|
alink.replaceWith(tstr)
|
||||||
return self.adeify_images(soup)
|
return soup
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
||||||
@ -61,6 +61,11 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
(re.compile(r'”'), lambda match: ' »)'),
|
(re.compile(r'”'), lambda match: ' »)'),
|
||||||
(re.compile(r'>\''), lambda match: '>‘'),
|
(re.compile(r'>\''), lambda match: '>‘'),
|
||||||
(re.compile(r' \''), lambda match: ' ‘'),
|
(re.compile(r' \''), lambda match: ' ‘'),
|
||||||
|
(re.compile(r' "'), lambda match: ' « '),
|
||||||
|
(re.compile(r'>"'), lambda match: '>« '),
|
||||||
|
(re.compile(r'"<'), lambda match: ' »<'),
|
||||||
|
(re.compile(r'" '), lambda match: ' » '),
|
||||||
|
(re.compile(r'",'), lambda match: ' »,'),
|
||||||
(re.compile(r'\''), lambda match: '’'),
|
(re.compile(r'\''), lambda match: '’'),
|
||||||
(re.compile(r'"<em>'), lambda match: '<em>« '),
|
(re.compile(r'"<em>'), lambda match: '<em>« '),
|
||||||
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>« '),
|
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>« '),
|
||||||
@ -86,9 +91,10 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
(re.compile(r'\s»'), lambda match: ' »'),
|
(re.compile(r'\s»'), lambda match: ' »'),
|
||||||
(re.compile(r'«\s'), lambda match: '« '),
|
(re.compile(r'«\s'), lambda match: '« '),
|
||||||
(re.compile(r' %'), lambda match: ' %'),
|
(re.compile(r' %'), lambda match: ' %'),
|
||||||
(re.compile(r'\.jpg » border='), lambda match: '.jpg'),
|
(re.compile(r'\.jpg » width='), lambda match: '.jpg'),
|
||||||
(re.compile(r'\.png » border='), lambda match: '.png'),
|
(re.compile(r'\.png » width='), lambda match: '.png'),
|
||||||
(re.compile(r' – '), lambda match: ' – '),
|
(re.compile(r' – '), lambda match: ' – '),
|
||||||
|
(re.compile(r'figcaption style="display:none"'), lambda match: 'figcaption'),
|
||||||
(re.compile(r' – '), lambda match: ' – '),
|
(re.compile(r' – '), lambda match: ' – '),
|
||||||
(re.compile(r' - '), lambda match: ' – '),
|
(re.compile(r' - '), lambda match: ' – '),
|
||||||
(re.compile(r' -,'), lambda match: ' –,'),
|
(re.compile(r' -,'), lambda match: ' –,'),
|
||||||
@ -97,10 +103,15 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['contenu']})
|
dict(name='div', attrs={'class':['global']})
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
|
|
||||||
remove_tags_after = [dict(id='appel_temoignage')]
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['bloc_base meme_sujet']}),
|
||||||
|
dict(name='p', attrs={'class':['lire']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='fb-like')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = article.get('guid', None)
|
url = article.get('guid', None)
|
||||||
@ -136,4 +147,3 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
cover_url = link_item.img['src']
|
cover_url = link_item.img['src']
|
||||||
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
12
recipes/liberatorio_politico.recipe
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1334649829(BasicNewsRecipe):
|
||||||
|
title = u'Liberatorio Politico'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
masthead_url = 'http://liberatorio.altervista.org/wp-content/uploads/2012/01/Testata-LIBERATORIO-Altervista1.jpg'
|
||||||
|
feeds = [(u'Liberatorio Politico', u'http://liberatorio.altervista.org/feed/')]
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'Inquiry journalism - a blog on Molfetta, Land of Bari, Apulia and Italy - v1.00 (07, April 2012)'
|
||||||
|
language = 'it'
|
50
recipes/limes.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, faber1971'
|
||||||
|
__version__ = 'v1.00'
|
||||||
|
__date__ = '16, April 2012'
|
||||||
|
__description__ = 'Geopolitical Italian magazine'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Limes(BasicNewsRecipe):
|
||||||
|
description = 'Italian weekly magazine'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
|
||||||
|
cover_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
|
||||||
|
title = 'Limes'
|
||||||
|
category = 'Geopolitical news'
|
||||||
|
|
||||||
|
language = 'it'
|
||||||
|
# encoding = 'cp1252'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 16
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 10
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
masthead_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Limes', u'http://temi.repubblica.it/limes/feed/')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
|
||||||
|
dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
|
||||||
|
dict(name='div', attrs={'id':['content-second-right','content2']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
|
||||||
|
dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
|
||||||
|
dict(name='ul',attrs={'id':'user-utility'}),
|
||||||
|
dict(name=['script','noscript','iframe'])
|
||||||
|
]
|
||||||
|
|
@ -1,4 +1,6 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'Collection of Italian marketing websites - v1.04 (17, March 2012)'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -9,12 +11,9 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
|
|||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
conversion_options = {'linearize_tables': True}
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'id':'ads0'})
|
dict(name='ul', attrs={'id':'ads0'})
|
||||||
]
|
]
|
||||||
masthead_url = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
|
masthead_url = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
|
||||||
__author__ = 'faber1971'
|
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Marketing Journal', u'http://feeds.feedburner.com/marketingjournal/jPwA'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
|
||||||
description = 'Collection of Italian marketing websites - v1.03 (20, February 2012)'
|
|
||||||
language = 'it'
|
|
||||||
|
|
||||||
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
|
|
||||||
|
85
recipes/melbourne_herald_sun.recipe
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Matthew Briggs'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.herald sun.com.au/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DailyTelegraph(BasicNewsRecipe):
|
||||||
|
title = u'Melbourne Herald Sun'
|
||||||
|
__author__ = u'Ray Hartley'
|
||||||
|
description = (u'Victorian and National News'
|
||||||
|
'. You will need to have a subscription to '
|
||||||
|
'http://www.heraldsun.com.au to get full articles.')
|
||||||
|
language = 'en_AU'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
needs_subscription = 'optional'
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_AU'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://resources2.news.com.au/cs/heraldsun/images/header-and-footer/logo.gif'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
.caption{display: inline; font-size: x-small}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(attrs={'id':'story'})]
|
||||||
|
remove_tags_before=dict(attrs={'class':'story-header'})
|
||||||
|
remove_tags_after=dict(attrs={'class':'story-footer'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link','base','iframe','embed','object','media-metadata','media-reference','media-producer'])
|
||||||
|
,dict(attrs={'class':['story-header-tools','story-sidebar','story-footer','story-summary-list']})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Breaking News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_breakingnews_206.xml' )
|
||||||
|
,(u'Business' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_business_207.xml' )
|
||||||
|
,(u'Entertainment' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_entertainment_208.xml' )
|
||||||
|
,(u'Health Science' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_health_212.xml' )
|
||||||
|
,(u'Music' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_music_449.xml' )
|
||||||
|
,(u'National News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_national_209.xml' )
|
||||||
|
,(u'Sport News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_sport_213.xml' )
|
||||||
|
,(u'AFL News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_205.xml' )
|
||||||
|
,(u'State News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_vic_214.xml' )
|
||||||
|
,(u'Technology' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tech_215.xml' )
|
||||||
|
,(u'World News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_world_216.xml' )
|
||||||
|
,(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/heraldsun_opinion_210.xml' )
|
||||||
|
,(u'Andrew Bolt' , u'http://blogs.news.com.au/heraldsun/andrewbolt/index.php/xml/rss_2.0/heraldsun/hs_andrewbolt/')
|
||||||
|
,(u'Afl - St Kilda' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_stkilda_565.xml')
|
||||||
|
,(u'Terry McCrann' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tmccrann_224.xml' )
|
||||||
|
,(u'The Other side' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_otherside_211.xml')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
if self.username and self.password:
|
||||||
|
br.open('http://www.heraldsun.com.au')
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
raw = br.submit().read()
|
||||||
|
if '>log out' not in raw.lower():
|
||||||
|
raise ValueError('Failed to log in to www.heralsun'
|
||||||
|
' are your username and password correct?')
|
||||||
|
return br
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.id
|
||||||
|
|
||||||
|
|
@ -3,25 +3,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
import re
|
import re
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
try:
|
|
||||||
from calibre_plugins.drMerry.debug import debuglogger as mlog
|
|
||||||
print 'drMerry debuglogger found, debug options can be used'
|
|
||||||
from calibre_plugins.drMerry.stats import statslogger as mstat
|
|
||||||
print 'drMerry stats tracker found, stat can be tracked'
|
|
||||||
mlog.setLoglevel(1) #-1 == no log; 0 for normal output
|
|
||||||
mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
|
|
||||||
KEEPSTATS = mstat.keepmystats()
|
|
||||||
SHOWDEBUG0 = mlog.showdebuglevel(0)
|
|
||||||
SHOWDEBUG1 = mlog.showdebuglevel(1)
|
|
||||||
SHOWDEBUG2 = mlog.showdebuglevel(2)
|
|
||||||
except:
|
|
||||||
#print 'drMerry debuglogger not found, skipping debug options'
|
|
||||||
SHOWDEBUG0 = False
|
|
||||||
SHOWDEBUG1 = False
|
|
||||||
SHOWDEBUG2 = False
|
|
||||||
KEEPSTATS = False
|
|
||||||
|
|
||||||
#print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))
|
|
||||||
|
|
||||||
''' Version 1.2, updated cover image to match the changed website.
|
''' Version 1.2, updated cover image to match the changed website.
|
||||||
added info date on title
|
added info date on title
|
||||||
@ -43,6 +24,9 @@ except:
|
|||||||
extended timeout from 2 to 10
|
extended timeout from 2 to 10
|
||||||
changed oldest article from 10 to 1.2
|
changed oldest article from 10 to 1.2
|
||||||
changed max articles from 15 to 25
|
changed max articles from 15 to 25
|
||||||
|
Version 1.9.1 18-04-2012
|
||||||
|
removed some debug settings
|
||||||
|
updated code to match new metro-layout
|
||||||
'''
|
'''
|
||||||
|
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
@ -70,34 +54,40 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
'author_sort' : 'Metro Nederland & calibre & DrMerry',
|
'author_sort' : 'Metro Nederland & calibre & DrMerry',
|
||||||
'publisher' : 'DrMerry/Metro Nederland'
|
'publisher' : 'DrMerry/Metro Nederland'
|
||||||
}
|
}
|
||||||
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
|
extra_css = 'body {padding:5px 0; background-color:#fff;font-size: 1em}\
|
||||||
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
|
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {margin-bottom: 10px}\
|
||||||
.article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
|
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name, p.article-image-caption .credits {font-size:0.5em}\
|
||||||
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
|
.article-box-fact.module-title, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear:both}\
|
||||||
.article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
|
.article-box-fact.module-title {padding: 8px 0}\
|
||||||
div.column-1-2 {display: inline;padding-right: 7px;}\
|
h1.title {color: #000;font-size: 1.4em}\
|
||||||
p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
|
.article-box-fact.module-title, h2.subtitle {font-size: 1.2em}\
|
||||||
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
|
h1.title, h2.subtitle, .article-body p{padding-bottom:10px}\
|
||||||
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
|
h1.title, p.article-image-caption {font-weight: 300}\
|
||||||
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
|
div.column-1-3{margin-left: 19px;padding-right: 9px}\
|
||||||
img {border:0px; padding:2px;} hr.merryhr {width:30%; border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
|
div.column-1-2 {display: inline;padding-right: 7px}\
|
||||||
|
p.article-image-caption {font-size: 0.6em;margin-top: 5px}\
|
||||||
|
p.article-image-caption, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {color: #616262}\
|
||||||
|
p.article-image-caption .credits {font-style: italic}\
|
||||||
|
div.article-image-caption {width: 246px;margin: 5px}\
|
||||||
|
div.article-image-caption-2column {width: 373px}\
|
||||||
|
div.article-image-caption-2column, div.article-image-caption-3column {margin-bottom: 5px}\
|
||||||
|
img {border:0}\
|
||||||
|
img, div.column-3 {padding:2px}\
|
||||||
|
hr.merryhr {width:30%; border-width:0; margin-left:5px; background-color: #24763b}\
|
||||||
|
div.column-3 {background-color:#eee; width:50%; margin:2px; float:right}\
|
||||||
|
div.column-3 module-title {border: 1px solid #aaa}\
|
||||||
|
div.article-box-fact div.subtitle, .article-box-fact.module-title, h2.subtitle {font-weight:bold}\
|
||||||
|
div.article-box-fact div.subtitle, hr.merryhr, .article-box-fact.module-title {color: #24763b}'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<img[^>]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<img[^>]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: '<hr class="merryhr" />'),
|
lambda match: '<hr class="merryhr" />'),
|
||||||
(re.compile(r'(<img[^>]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<img[^>]+(metronieuws\.nl/[^>]+/templates/[^>]+jpe?g|metronieuws\.nl/internal\-roxen\-unit\.gif)[^>]+>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: ''),
|
lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.setdefaults()
|
|
||||||
mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)])
|
|
||||||
if KEEPSTATS == True:
|
|
||||||
mlog.addDebug('Stats will be calculated')
|
|
||||||
else:
|
|
||||||
mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel])
|
|
||||||
mlog.showDebug()
|
|
||||||
myProcess = MerryProcess()
|
myProcess = MerryProcess()
|
||||||
myProcess.removeUnwantedTags(soup)
|
myProcess.removeUnwantedTags(soup)
|
||||||
return soup
|
return soup
|
||||||
@ -105,18 +95,6 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
myProcess = MerryProcess()
|
myProcess = MerryProcess()
|
||||||
myProcess.optimizeLayout(soup)
|
myProcess.optimizeLayout(soup)
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
if KEEPSTATS == True:
|
|
||||||
statinfo = 'generated stats:'
|
|
||||||
statinfo += str(mstat.stats(mstat.statslist))
|
|
||||||
print statinfo
|
|
||||||
statinfo = 'generated stats (for removed tags):'
|
|
||||||
statinfo += str(mstat.stats(mstat.removedtagslist))
|
|
||||||
print statinfo
|
|
||||||
#show all Debug info we forgot to report
|
|
||||||
#Using print to be sure that this text will not be added at the end of the log.
|
|
||||||
print '\n!!!!!unreported messages:\n(should be empty)\n'
|
|
||||||
mlog.showDebug()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -142,44 +120,24 @@ class MerryPreProcess():
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def optimizePicture(self,soup):
|
def optimizePicture(self,soup):
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addDebug('start image optimize')
|
|
||||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||||
iurl = tag['src']
|
iurl = tag['src']
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(iurl)
|
img.open(iurl)
|
||||||
img.trim(0)
|
img.trim(0)
|
||||||
img.save(iurl)
|
img.save(iurl)
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addDebug('Images optimized')
|
|
||||||
mlog.showDebug()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
class MerryExtract():
|
class MerryExtract():
|
||||||
def safeRemovePart(self, killingSoup, soupIsArray):
|
def safeRemovePart(self, killingSoup, soupIsArray):
|
||||||
if killingSoup and not killingSoup == None:
|
if killingSoup and not killingSoup == None:
|
||||||
if SHOWDEBUG2 == True:
|
|
||||||
mlog.addTextAndTag(['items to remove'],[killingSoup])
|
|
||||||
try:
|
try:
|
||||||
if soupIsArray == True:
|
if soupIsArray == True:
|
||||||
for killer in killingSoup:
|
for killer in killingSoup:
|
||||||
killer.extract()
|
killer.extract()
|
||||||
else:
|
else:
|
||||||
killingSoup.extract()
|
killingSoup.extract()
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('tag extracted')
|
|
||||||
mlog.showDebug()
|
|
||||||
if KEEPSTATS == True:
|
|
||||||
try:
|
|
||||||
mstat.addstat(mstat.removedtagslist,str(killingSoup.name))
|
|
||||||
except:
|
|
||||||
mstat.addstat(mstat.removedtagslist,'unknown')
|
|
||||||
except:
|
except:
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('tag extraction failed')
|
|
||||||
mlog.showDebug()
|
|
||||||
if KEEPSTATS == True:
|
|
||||||
mstat.addstat(mstat.removedtagslist,'exception')
|
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
@ -230,60 +188,26 @@ class MerryProcess(BeautifulSoup):
|
|||||||
|
|
||||||
def optimizeLayout(self,soup):
|
def optimizeLayout(self,soup):
|
||||||
self.myPrepare.optimizePicture(soup)
|
self.myPrepare.optimizePicture(soup)
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addDebug('End of Optimize Layout')
|
|
||||||
mlog.showDebug()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def insertFacts(self, soup):
|
def insertFacts(self, soup):
|
||||||
allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
|
allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addTextAndTag(['allfacts'],[allfacts])
|
|
||||||
mlog.showDebug()
|
|
||||||
if allfacts and not allfacts == None:
|
if allfacts and not allfacts == None:
|
||||||
allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
|
allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addTextAndTag(['allfactsparent'],[allfactsparent])
|
|
||||||
mlog.showDebug()
|
|
||||||
for part in allfactsparent:
|
for part in allfactsparent:
|
||||||
if not part in allfacts:
|
if not part in allfacts:
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addTextAndTag(['FOUND A non-fact'],[part])
|
|
||||||
mlog.showDebug()
|
|
||||||
self.myKiller.safeRemovePart(part, True)
|
self.myKiller.safeRemovePart(part, True)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addTextAndTag(['New All Facts'],[allfacts])
|
|
||||||
mlog.showDebug()
|
|
||||||
articlefacts = soup.find('div', {'class':'article-box-fact column'})
|
articlefacts = soup.find('div', {'class':'article-box-fact column'})
|
||||||
errorOccured=False
|
|
||||||
if (articlefacts and not articlefacts==None):
|
if (articlefacts and not articlefacts==None):
|
||||||
try:
|
try:
|
||||||
contenttag = soup.find('div', {'class':'article-body'})
|
contenttag = soup.find('div', {'class':'article-body'})
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addTextAndTag(['curcontag'],[contenttag])
|
|
||||||
mlog.showDebug()
|
|
||||||
foundrighttag = False
|
foundrighttag = False
|
||||||
if contenttag and not contenttag == None:
|
if contenttag and not contenttag == None:
|
||||||
foundrighttag = True
|
foundrighttag = True
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
if errorOccured == False:
|
|
||||||
mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag])
|
|
||||||
else:
|
|
||||||
mlog.addDebug('Could not find right parent tag. Error Occured')
|
|
||||||
mlog.showDebug()
|
|
||||||
if foundrighttag == True:
|
if foundrighttag == True:
|
||||||
contenttag.insert(0, allfactsparent)
|
contenttag.insert(0, allfactsparent)
|
||||||
if SHOWDEBUG2 == True:
|
|
||||||
mlog.addTextAndTag(['added parent'],[soup.prettify()])
|
|
||||||
mlog.showDebug()
|
|
||||||
except:
|
except:
|
||||||
errorOccured=True
|
pass
|
||||||
mlog.addTrace()
|
|
||||||
else:
|
|
||||||
errorOccured=True
|
|
||||||
if SHOWDEBUG0 == True and errorOccured == True:
|
|
||||||
mlog.addTextAndTag(['no articlefacts'],[articlefacts])
|
|
||||||
mlog.showDebug()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
|
def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
|
||||||
@ -300,71 +224,38 @@ class MerryProcess(BeautifulSoup):
|
|||||||
sibs = findsibsof.nextSiblingGenerator()
|
sibs = findsibsof.nextSiblingGenerator()
|
||||||
for sib in sibs:
|
for sib in sibs:
|
||||||
self.myKiller.safeRemovePart(sib, True)
|
self.myKiller.safeRemovePart(sib, True)
|
||||||
else:
|
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Not any sib found')
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def removeUnwantedTags(self,soup):
|
def removeUnwantedTags(self,soup):
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))])
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeTagsByName(soup)
|
self.removeTagsByName(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.insertFacts(soup)
|
self.insertFacts(soup)
|
||||||
self.removeFirstAndLastPart(soup)
|
self.removeFirstAndLastPart(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeUnwantedParts(soup)
|
self.removeUnwantedParts(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeEmptyTags(soup)
|
self.removeEmptyTags(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.myReplacer.replaceATag(soup)
|
self.myReplacer.replaceATag(soup)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeUnwantedParts(self, soup):
|
def removeUnwantedParts(self, soup):
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeUnwantedTagsByID(soup)
|
self.removeUnwantedTagsByID(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before Class: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeUnwantedTagsByClass(soup)
|
self.removeUnwantedTagsByClass(soup)
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('Len of Soup before Style: %s' % len(str(soup)))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeUnwantedTagsByStyle(soup)
|
self.removeUnwantedTagsByStyle(soup)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeUnwantedTagsByStyle(self,soup):
|
def removeUnwantedTagsByStyle(self,soup):
|
||||||
self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
|
self.removeArrayOfTags(soup.findAll(attrs={'style':re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
|
||||||
if SHOWDEBUG0 == True:
|
self.removeArrayOfTags(soup.findAll(attrs={'title':'volledig scherm'}))
|
||||||
mlog.addDebug('end remove by style')
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeArrayOfTags(self,souparray):
|
def removeArrayOfTags(self,souparray):
|
||||||
return self.myKiller.safeRemovePart(souparray, True)
|
return self.myKiller.safeRemovePart(souparray, True)
|
||||||
|
|
||||||
def removeUnwantedTagsByClass(self,soup):
|
def removeUnwantedTagsByClass(self,soup):
|
||||||
if SHOWDEBUG0 == True:
|
self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|share-tools-top|share-tools-bottom|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15|footer-[a-zA-Z0-9]+)$')}))
|
||||||
mlog.addDebug('start remove by class')
|
|
||||||
self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')}))
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeUnwantedTagsByID(self,soup):
|
def removeUnwantedTagsByID(self,soup):
|
||||||
defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer']
|
defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer','gallery-1']
|
||||||
for removeid in defaultids:
|
for removeid in defaultids:
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup))))
|
|
||||||
mlog.showDebug()
|
|
||||||
self.removeArrayOfTags(soup.findAll(id=removeid))
|
self.removeArrayOfTags(soup.findAll(id=removeid))
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
@ -380,33 +271,12 @@ class MerryProcess(BeautifulSoup):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeEmptyTags(self,soup,run=0):
|
def removeEmptyTags(self,soup,run=0):
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
mlog.addDebug('starting removeEmptyTags')
|
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
run += 1
|
|
||||||
mlog.addDebug(run)
|
|
||||||
if SHOWDEBUG2 == True:
|
|
||||||
mlog.addDebug(str(soup.prettify()))
|
|
||||||
mlog.showDebug()
|
|
||||||
emptymatches = re.compile('^( |\s|\n|\r|\t)*$')
|
emptymatches = re.compile('^( |\s|\n|\r|\t)*$')
|
||||||
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
|
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
|
||||||
if emptytags and not (emptytags == None or emptytags == []):
|
if emptytags and not (emptytags == None or emptytags == []):
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('tags found')
|
|
||||||
mlog.addDebug(str(emptytags))
|
|
||||||
self.removeArrayOfTags(emptytags)
|
self.removeArrayOfTags(emptytags)
|
||||||
#recursive in case removing empty tag creates new empty tag
|
#recursive in case removing empty tag creates new empty tag
|
||||||
self.removeEmptyTags(soup, run=run)
|
self.removeEmptyTags(soup, run=run)
|
||||||
else:
|
|
||||||
if SHOWDEBUG1 == True:
|
|
||||||
mlog.addDebug('no empty tags found')
|
|
||||||
mlog.showDebug()
|
|
||||||
if SHOWDEBUG0 == True:
|
|
||||||
if SHOWDEBUG2 == True:
|
|
||||||
mlog.addDebug('new soup:')
|
|
||||||
mlog.addDebug(str(soup.prettify()))
|
|
||||||
mlog.addDebug('RemoveEmptyTags Completed')
|
|
||||||
mlog.showDebug()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def removeFirstAndLastPart(self,soup):
|
def removeFirstAndLastPart(self,soup):
|
||||||
|
@ -1,52 +1,30 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
title = u'Metro UK'
|
title = u'Metro UK'
|
||||||
description = 'News as provide by The Metro -UK'
|
description = 'News as provide by The Metro -UK'
|
||||||
|
#timefmt = ''
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
#last update 3/12/11
|
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||||
no_stylesheets = True
|
#no_stylesheets = True
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 10
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
#preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
|
||||||
|
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
|
||||||
|
|
||||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
|
||||||
dict(attrs={'class':['img-cnt figure']}),
|
|
||||||
dict(attrs={'class':['art-img']}),
|
|
||||||
dict(name='div', attrs={'class':'art-lft'}),
|
|
||||||
dict(name='p')
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name = 'div',attrs={'id' : ['comments-news','formSubmission']}),
|
|
||||||
dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
|
||||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r','username','clrd' ]}),
|
|
||||||
dict(attrs={'class':['username', 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime','addYourComment','displayName']})
|
|
||||||
,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body {font: sans-serif medium;}'
|
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
'''
|
||||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
|
||||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
|
||||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
|
||||||
|
|
||||||
'''
|
|
||||||
|
@ -7,12 +7,12 @@ class naczytniki(BasicNewsRecipe):
|
|||||||
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
description ='everything about e-readers'
|
description ='everything about e-readers'
|
||||||
category='readers'
|
category='e-readers'
|
||||||
no_stylesheets=True
|
no_stylesheets=True
|
||||||
|
use_embedded_content=False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
||||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
|
||||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||||
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
||||||
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
class recipeMagic(BasicNewsRecipe):
|
class recipeMagic(BasicNewsRecipe):
|
||||||
title = 'National Geographic PL'
|
title = 'National Geographic PL'
|
||||||
__author__ = 'Marcin Urban 2011'
|
__author__ = 'Marcin Urban 2011'
|
||||||
|
__modified_by__ = 'fenuks'
|
||||||
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
||||||
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height']
|
||||||
|
feeds=[]
|
||||||
|
|
||||||
feeds = [
|
def find_articles(self, url):
|
||||||
('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
|
articles = []
|
||||||
]
|
soup=self.index_to_soup(url)
|
||||||
|
tag=soup.find(attrs={'class':'arl'})
|
||||||
|
art=tag.ul.findAll('li')
|
||||||
|
for i in art:
|
||||||
|
title=i.a['title']
|
||||||
|
url=i.a['href']
|
||||||
|
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||||
|
desc=i.div.p.string
|
||||||
|
articles.append({'title' : title,
|
||||||
|
'url' : url,
|
||||||
|
'date' : '',
|
||||||
|
'description' : desc
|
||||||
|
})
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
|
||||||
|
feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
|
if 'artykuly' in url:
|
||||||
|
return url.replace('artykuly/pokaz', 'drukuj-artykul')
|
||||||
|
elif 'aktualnosci' in url:
|
||||||
|
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
|
||||||
|
else:
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
|
||||||
|
tag=soup.find(attrs={'class':'txt jus'})
|
||||||
|
self.cover_url=tag.img['src']
|
||||||
|
return getattr(self, 'cover_url', self.cover_url)
|
||||||
|
|
||||||
|
@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
|||||||
title=soup.find(attrs={'class':'tytul'})
|
title=soup.find(attrs={'class':'tytul'})
|
||||||
if title:
|
if title:
|
||||||
title['style']='font-size: 20px; font-weight: bold;'
|
title['style']='font-size: 20px; font-weight: bold;'
|
||||||
self.log.warn(soup)
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.INDEX + a['href']
|
||||||
return soup
|
return soup
|
||||||
|
76
recipes/nrc_handelsblad.recipe
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012'
|
||||||
|
'''
|
||||||
|
nrc.nl
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NRC(BasicNewsRecipe):
|
||||||
|
title = 'NRC Handelsblad'
|
||||||
|
__author__ = 'veezh'
|
||||||
|
description = 'Nieuws (no subscription needed)'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'nrc.nl'
|
||||||
|
category = 'news, Netherlands, world'
|
||||||
|
language = 'nl'
|
||||||
|
timefmt = ''
|
||||||
|
#publication_type = 'newsportal'
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-size:130%;}
|
||||||
|
#h2{font-size:100%;font-weight:normal;}
|
||||||
|
#.href{font-size:xx-small;}
|
||||||
|
.bijschrift{color:#666666; font-size:x-small;}
|
||||||
|
#.main-article-info{font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||||
|
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||||
|
'''
|
||||||
|
#preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
|
||||||
|
remove_tags_after = [dict(id='broodtekst')]
|
||||||
|
|
||||||
|
# keep_only_tags = [
|
||||||
|
# dict(name='div', attrs={'class':['label']})
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# remove_tags_after = [dict(name='dl', attrs={'class':['tags']})]
|
||||||
|
|
||||||
|
# def get_article_url(self, article):
|
||||||
|
# link = article.get('link')
|
||||||
|
# if 'blog' not in link and ('chat' not in link):
|
||||||
|
# return link
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
# ('Nieuws', 'http://www.nrc.nl/rss.php'),
|
||||||
|
('Binnenland', 'http://www.nrc.nl/nieuws/categorie/binnenland/rss.php'),
|
||||||
|
('Buitenland', 'http://www.nrc.nl/nieuws/categorie/buitenland/rss.php'),
|
||||||
|
('Economie', 'http://www.nrc.nl/nieuws/categorie/economie/rss.php'),
|
||||||
|
('Wetenschap', 'http://www.nrc.nl/nieuws/categorie/wetenschap/rss.php'),
|
||||||
|
('Cultuur', 'http://www.nrc.nl/nieuws/categorie/cultuur/rss.php'),
|
||||||
|
('Boeken', 'http://www.nrc.nl/boeken/rss.php'),
|
||||||
|
('Tech', 'http://www.nrc.nl/tech/rss.php/'),
|
||||||
|
('Klimaat', 'http://www.nrc.nl/klimaat/rss.php/'),
|
||||||
|
]
|
@ -1,45 +1,69 @@
|
|||||||
# Talking Points is not grabbing everything.
|
|
||||||
# The look is right, but only the last one added?
|
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
# above for debugging via stack
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
# Allows the Python soup converter, which makes parsing easier.
|
# Allows the Python soup converter, which makes parsing easier.
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
# strip ads and graphics
|
|
||||||
# Current Column lacks a title.
|
import os
|
||||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
|
||||||
|
|
||||||
|
from calibre.web.feeds import feeds_from_index
|
||||||
|
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||||
|
|
||||||
|
|
||||||
|
# To Do: strip ads and graphics, Current Column lacks a title.
|
||||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||||
# Newsletters: Talking Points Memos covered by cat12
|
# Newsletters: Talking Points Memos covered by cat12
|
||||||
|
# ./ebook-convert --username xxx --password xxx
|
||||||
|
|
||||||
|
# this is derived from BasicNewsRecipe, so it can only overload those.
|
||||||
|
# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
|
||||||
class OReillyPremium(BasicNewsRecipe):
|
class OReillyPremium(BasicNewsRecipe):
|
||||||
title = u'OReilly Premium'
|
title = u'OReilly Premium'
|
||||||
__author__ = 'TMcN'
|
__author__ = 'TMcN'
|
||||||
language = 'en'
|
|
||||||
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
||||||
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
||||||
|
custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
|
||||||
|
title = 'Bill O\'Reilly Premium'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
conversion_options = {'linearize_tables': True}
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
needs_subscription = True
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 20
|
needs_subscription = True
|
||||||
|
oldest_article = 31
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_tags = [dict(name='img', attrs={})]
|
remove_tags = [dict(name='img', attrs={})]
|
||||||
# Don't go down
|
# Don't go down
|
||||||
recursions = 0
|
recursions = 0
|
||||||
max_articles_per_feed = 2000
|
max_articles_per_feed = 20
|
||||||
|
|
||||||
debugMessages = True
|
debugMessages = True
|
||||||
|
|
||||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
# ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
# ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
# ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
# ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
|
||||||
|
(u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'),
|
||||||
|
(u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
|
||||||
|
(u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
|
||||||
|
(u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
|
||||||
|
]
|
||||||
|
# http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.
|
||||||
|
|
||||||
|
# Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
|
||||||
|
# Now using RSS
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
print("In get_browser")
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
||||||
@ -66,6 +90,7 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
def stripBadChars(self, inString) :
|
def stripBadChars(self, inString) :
|
||||||
return inString.replace("\'", "")
|
return inString.replace("\'", "")
|
||||||
|
|
||||||
|
|
||||||
def parseGeneric(self, baseURL):
|
def parseGeneric(self, baseURL):
|
||||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
@ -73,6 +98,7 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
fullReturn = []
|
fullReturn = []
|
||||||
for i in range(len(self.catList)) :
|
for i in range(len(self.catList)) :
|
||||||
articleList = []
|
articleList = []
|
||||||
|
print("In "+self.catList[i][0]+", index: "+ str(i))
|
||||||
soup = self.index_to_soup(self.catList[i][1])
|
soup = self.index_to_soup(self.catList[i][1])
|
||||||
# Set defaults
|
# Set defaults
|
||||||
description = 'None'
|
description = 'None'
|
||||||
@ -81,14 +107,12 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# 3-5 create one.
|
# 3-5 create one.
|
||||||
# So no for-div for 3-5
|
# So no for-div for 3-5
|
||||||
|
|
||||||
if i < 3 :
|
if i == 0 :
|
||||||
|
print("Starting TV Archives")
|
||||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||||
|
print("Next DIV:")
|
||||||
print(div)
|
print(div)
|
||||||
if i == 1:
|
a = div
|
||||||
a = div.find('a', href=True)
|
|
||||||
else :
|
|
||||||
a = div
|
|
||||||
print(a)
|
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
if summary:
|
if summary:
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
@ -96,82 +120,63 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||||
url = baseURL+a['href']
|
url = baseURL+a['href']
|
||||||
if i < 2 :
|
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
elif i == 2 :
|
|
||||||
# Daily Briefs
|
|
||||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
|
||||||
title = div.contents[0]
|
|
||||||
if self.debugMessages :
|
|
||||||
print(title+" @ "+url)
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
|
|
||||||
elif i == 3 : # Stratfor
|
|
||||||
a = soup.find('a', self.catList[i][3])
|
|
||||||
if a is None :
|
|
||||||
continue
|
|
||||||
url = baseURL+a['href']
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
# Get Stratfor contents so we can get the real title.
|
|
||||||
stratSoup = self.index_to_soup(url)
|
|
||||||
title = stratSoup.html.head.title.string
|
|
||||||
stratIndex = title.find('Stratfor.com:', 0)
|
|
||||||
if (stratIndex > -1) :
|
|
||||||
title = title[stratIndex+14:-1]
|
|
||||||
# Look for first blogBody <td class="blogBody"
|
|
||||||
# Changed 12 Jan 2012 - new page format
|
|
||||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
|
||||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
|
||||||
elif i == 4 : # Talking Points
|
|
||||||
topDate = soup.find("td", "blogBody")
|
|
||||||
if not topDate :
|
|
||||||
print("Failed to find date in Talking Points")
|
|
||||||
# This page has the contents in double-wrapped tables!
|
|
||||||
myTable = topDate.findParents('table')[0]
|
|
||||||
if myTable is not None:
|
|
||||||
upOneTable = myTable.findParents('table')[0]
|
|
||||||
if upOneTable is not None:
|
|
||||||
upTwo = upOneTable.findParents('table')[0]
|
|
||||||
if upTwo is None:
|
|
||||||
continue
|
|
||||||
# Now navigate rows of upTwo
|
|
||||||
if self.debugMessages :
|
|
||||||
print("Entering rows")
|
|
||||||
for rows in upTwo.findChildren("tr", recursive=False):
|
|
||||||
# Inside top level table, each row is an article
|
|
||||||
rowTable = rows.find("table")
|
|
||||||
articleTable = rowTable.find("table")
|
|
||||||
# This looks wrong.
|
|
||||||
articleTable = rows.find("tr")
|
|
||||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
|
||||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
|
||||||
# Skip to second blogBody for this.
|
|
||||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
|
||||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
|
||||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
|
||||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
|
||||||
if self.debugMessages :
|
|
||||||
print("Talking Points Memo title "+title+" at url: "+url)
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
|
||||||
else : # Current Column
|
else : # Current Column
|
||||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||||
if titleSpan is None :
|
if titleSpan is None :
|
||||||
|
print("No Current Column Title Span")
|
||||||
|
print(soup)
|
||||||
continue
|
continue
|
||||||
title = titleSpan.contents[0]
|
title = titleSpan.contents[0]
|
||||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||||
if i == 3 or i == 5 :
|
if i == 1 :
|
||||||
if self.debugMessages :
|
if self.debugMessages :
|
||||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
if summary:
|
print("At Summary")
|
||||||
|
print(summary)
|
||||||
|
if summary is not None:
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
print("At append")
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
self.catList[i][3] = articleList
|
self.catList[i][3] = articleList
|
||||||
fullReturn.append((self.catList[i][0], articleList))
|
fullReturn.append((self.catList[i][0], articleList))
|
||||||
|
print("Returning")
|
||||||
|
# print fullReturn
|
||||||
return fullReturn
|
return fullReturn
|
||||||
|
|
||||||
|
|
||||||
|
# build_index() starts with:
|
||||||
|
# try:
|
||||||
|
# feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||||
|
# max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
# log=self.log)
|
||||||
|
# self.report_progress(0, _('Got feeds from index page'))
|
||||||
|
# except NotImplementedError:
|
||||||
|
# feeds = self.parse_feeds()
|
||||||
|
|
||||||
|
# which in turn is from __init__.py
|
||||||
|
#def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||||
|
# log=default_log):
|
||||||
|
#'''
|
||||||
|
#@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||||
|
#@return: A list of L{Feed} objects.
|
||||||
|
#@rtype: list
|
||||||
|
#'''
|
||||||
|
#feeds = []
|
||||||
|
#for title, articles in index:
|
||||||
|
# pfeed = Feed(log=log)
|
||||||
|
# pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||||
|
# max_articles_per_feed=max_articles_per_feed)
|
||||||
|
# feeds.append(pfeed)
|
||||||
|
# return feeds
|
||||||
|
|
||||||
|
# use_embedded_content defaults to None, at which point if the content is > 2K, it is used as the article.
|
||||||
|
|
||||||
|
|
||||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||||
# returns a list of tuple ('feed title', list of articles)
|
# returns a list of tuple ('feed title', list of articles)
|
||||||
# {
|
# {
|
||||||
@ -182,12 +187,19 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||||
# }
|
# }
|
||||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||||
|
# it is called by download
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Parse the page into Python Soup
|
# Parse the page into Python Soup
|
||||||
|
print("Entering recipe print_index from:")
|
||||||
|
traceback.print_stack()
|
||||||
|
print("web")
|
||||||
baseURL = "https://www.billoreilly.com"
|
baseURL = "https://www.billoreilly.com"
|
||||||
return self.parseGeneric(baseURL)
|
masterList = self.parseGeneric(baseURL)
|
||||||
|
#print(masterList)
|
||||||
|
return masterList
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
print("In preprocess_html")
|
||||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
if refresh is None:
|
if refresh is None:
|
||||||
return soup
|
return soup
|
||||||
@ -195,3 +207,128 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
||||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
print("In OReilly build_index()\n\n")
|
||||||
|
feedsRSS = []
|
||||||
|
self.report_progress(0, ('Fetching feeds...'))
|
||||||
|
#try:
|
||||||
|
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||||
|
max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
log=self.log)
|
||||||
|
self.report_progress(0, ('Got feeds from index page'))
|
||||||
|
#except NotImplementedError:
|
||||||
|
# feeds = self.parse_feeds()
|
||||||
|
# Now add regular feeds.
|
||||||
|
feedsRSS = self.parse_feeds()
|
||||||
|
print ("feedsRSS is type "+feedsRSS.__class__.__name__)
|
||||||
|
|
||||||
|
for articles in feedsRSS:
|
||||||
|
print("articles is type "+articles.__class__.__name__)
|
||||||
|
print("Title:" + articles.title)
|
||||||
|
feeds.append(articles)
|
||||||
|
if not feeds:
|
||||||
|
raise ValueError('No articles found, aborting')
|
||||||
|
|
||||||
|
#feeds = FeedCollection(feeds)
|
||||||
|
|
||||||
|
self.report_progress(0, ('Trying to download cover...'))
|
||||||
|
self.download_cover()
|
||||||
|
self.report_progress(0, ('Generating masthead...'))
|
||||||
|
self.masthead_path = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
murl = self.get_masthead_url()
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to get masthead url')
|
||||||
|
murl = None
|
||||||
|
|
||||||
|
if murl is not None:
|
||||||
|
# Try downloading the user-supplied masthead_url
|
||||||
|
# Failure sets self.masthead_path to None
|
||||||
|
self.download_masthead(murl)
|
||||||
|
if self.masthead_path is None:
|
||||||
|
self.log.info("Synthesizing mastheadImage")
|
||||||
|
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||||
|
try:
|
||||||
|
self.default_masthead_image(self.masthead_path)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to generate default masthead image')
|
||||||
|
self.masthead_path = None
|
||||||
|
|
||||||
|
if self.test:
|
||||||
|
feeds = feeds[:2]
|
||||||
|
self.has_single_feed = len(feeds) == 1
|
||||||
|
|
||||||
|
index = os.path.join(self.output_dir, 'index.html')
|
||||||
|
|
||||||
|
html = self.feeds2index(feeds)
|
||||||
|
with open(index, 'wb') as fi:
|
||||||
|
fi.write(html)
|
||||||
|
|
||||||
|
self.jobs = []
|
||||||
|
|
||||||
|
if self.reverse_article_order:
|
||||||
|
for feed in feeds:
|
||||||
|
if hasattr(feed, 'reverse'):
|
||||||
|
feed.reverse()
|
||||||
|
|
||||||
|
self.feed_objects = feeds
|
||||||
|
for f, feed in enumerate(feeds):
|
||||||
|
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||||
|
if not os.path.isdir(feed_dir):
|
||||||
|
os.makedirs(feed_dir)
|
||||||
|
|
||||||
|
for a, article in enumerate(feed):
|
||||||
|
if a >= self.max_articles_per_feed:
|
||||||
|
break
|
||||||
|
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||||
|
if not os.path.isdir(art_dir):
|
||||||
|
os.makedirs(art_dir)
|
||||||
|
try:
|
||||||
|
url = self.print_version(article.url)
|
||||||
|
except NotImplementedError:
|
||||||
|
url = article.url
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to find print version for: '+article.url)
|
||||||
|
url = None
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
func, arg = (self.fetch_embedded_article, article) \
|
||||||
|
if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
|
||||||
|
else \
|
||||||
|
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
||||||
|
else self.fetch_article), url)
|
||||||
|
req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
|
||||||
|
{}, (f, a), self.article_downloaded,
|
||||||
|
self.error_in_article_download)
|
||||||
|
req.feed = feed
|
||||||
|
req.article = article
|
||||||
|
req.feed_dir = feed_dir
|
||||||
|
self.jobs.append(req)
|
||||||
|
|
||||||
|
|
||||||
|
self.jobs_done = 0
|
||||||
|
tp = ThreadPool(self.simultaneous_downloads)
|
||||||
|
for req in self.jobs:
|
||||||
|
tp.putRequest(req, block=True, timeout=0)
|
||||||
|
|
||||||
|
|
||||||
|
self.report_progress(0, ('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
tp.poll()
|
||||||
|
time.sleep(0.1)
|
||||||
|
except NoResultsPending:
|
||||||
|
break
|
||||||
|
for f, feed in enumerate(feeds):
|
||||||
|
print("Writing feeds for "+feed.title)
|
||||||
|
html = self.feed2index(f,feeds)
|
||||||
|
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||||
|
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||||
|
fi.write(html)
|
||||||
|
self.create_opf(feeds)
|
||||||
|
self.report_progress(1, ('Feeds downloaded to %s')%index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import urllib, re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
||||||
@ -27,12 +28,30 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'
|
masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class':'story'})
|
auto_cleanup = True
|
||||||
]
|
|
||||||
remove_tags = [
|
def get_article_url(self, article):
|
||||||
dict(name='div', attrs={'class':['articlerail','tools','comment-group','clearfix']}),
|
ans = None
|
||||||
]
|
try:
|
||||||
remove_tags_after = [
|
s = article.summary
|
||||||
dict(name='p', attrs={'class':'copyright'}),
|
ans = urllib.unquote(
|
||||||
]
|
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if ans is None:
|
||||||
|
link = article.get('feedburner_origlink', None)
|
||||||
|
if link and link.split('/')[-1]=="story01.htm":
|
||||||
|
link=link.split('/')[-2]
|
||||||
|
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||||
|
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
|
||||||
|
'0S':'//'}
|
||||||
|
for k, v in encoding.iteritems():
|
||||||
|
link = link.replace(k, v)
|
||||||
|
ans = link
|
||||||
|
elif link:
|
||||||
|
ans = link
|
||||||
|
if ans is not None:
|
||||||
|
return ans.replace('?track=rss', '')
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ class OurDailyBread(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'ODB, Daily Devotional, Bible, Christian Devotional, Devotional, RBC Ministries, Our Daily Bread, Devotionals, Daily Devotionals, Christian Devotionals, Faith, Bible Study, Bible Studies, Scripture, RBC, religion'
|
category = 'ODB, Daily Devotional, Bible, Christian Devotional, Devotional, RBC Ministries, Our Daily Bread, Devotionals, Daily Devotionals, Christian Devotionals, Faith, Bible Study, Bible Studies, Scripture, RBC, religion'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
@ -25,12 +26,12 @@ class OurDailyBread(BasicNewsRecipe):
|
|||||||
,'linearize_tables' : True
|
,'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':'module-content'})]
|
#keep_only_tags = [dict(attrs={'class':'module-content'})]
|
||||||
remove_tags = [
|
#remove_tags = [
|
||||||
dict(attrs={'id':'article-zoom'})
|
#dict(attrs={'id':'article-zoom'})
|
||||||
,dict(attrs={'class':'listen-now-box'})
|
#,dict(attrs={'class':'listen-now-box'})
|
||||||
]
|
#]
|
||||||
remove_tags_after = dict(attrs={'class':'readable-area'})
|
#remove_tags_after = dict(attrs={'class':'readable-area'})
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
.text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
|
@ -17,21 +17,8 @@ class Overclock_pl(BasicNewsRecipe):
|
|||||||
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
||||||
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
if 'articles/show' in url:
|
||||||
tag=soup.find(id='navigation')
|
return url.replace('show', 'showall')
|
||||||
if tag:
|
else:
|
||||||
nexturl=tag.findAll('option')
|
return url
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[2:]:
|
|
||||||
soup2 = self.index_to_soup(nextpage['value'])
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
rem=appendtag.find(attrs={'alt':'Pierwsza'})
|
|
||||||
if rem:
|
|
||||||
rem.parent.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -10,5 +10,7 @@ class palmtop_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
use_embedded_content=True
|
||||||
|
#remove_tags_before=dict(name='h2')
|
||||||
|
#remove_tags_after=dict(attrs={'class':'entry clearfix'})
|
||||||
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
||||||
|
@ -1,31 +1,32 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class PC_Arena(BasicNewsRecipe):
|
class PC_Arena(BasicNewsRecipe):
|
||||||
title = u'PCArena'
|
title = u'PCArena'
|
||||||
oldest_article = 18300
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
|
index='http://pcarena.pl'
|
||||||
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
|
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
|
||||||
|
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
remove_empty_feeds=True
|
||||||
remove_tags=[dict(attrs={'class':'pages'})]
|
#keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
||||||
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
|
#remove_tags=[dict(attrs={'class':'pages'})]
|
||||||
|
feeds = [(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'), (u'Testy', u'http://pcarena.pl/testy/feeds.rss'), (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'), (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'), (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('show', 'print')
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
def image_url_processor(self, baseurl, url):
|
||||||
tag=soup.find(name='div', attrs={'class':'pagNum'})
|
if 'http' not in url:
|
||||||
if tag:
|
return 'http://pcarena.pl' + url
|
||||||
nexturl=tag.findAll('a')
|
else:
|
||||||
tag.extract()
|
return url
|
||||||
for nextpage in nexturl[1:]:
|
|
||||||
nextpage= 'http://pcarena.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(attrs={'class':'artBody'})
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body)
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
return soup
|
return soup
|
@ -10,32 +10,11 @@ class PC_Centre(BasicNewsRecipe):
|
|||||||
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags= [dict(id='content')]
|
remove_empty_feeds = True
|
||||||
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
#keep_only_tags= [dict(id='content')]
|
||||||
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
#remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
||||||
|
remove_tags=[dict(attrs={'class':'logo_print'})]
|
||||||
|
feeds = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
return url.replace('show', 'print')
|
||||||
tag=soup.find(name='div', attrs={'class':'pages'})
|
|
||||||
if tag:
|
|
||||||
nexturl=tag.findAll('a')
|
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[:-1]:
|
|
||||||
nextpage= 'http://pccentre.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll(id='comments')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll('h1')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
readitlaterlist.com
|
Pocket Calibre Recipe v1.0
|
||||||
"""
|
"""
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '''
|
__copyright__ = '''
|
||||||
@ -12,22 +12,23 @@ from calibre import strftime
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Readitlater(BasicNewsRecipe):
|
class Pocket(BasicNewsRecipe):
|
||||||
title = 'ReadItLater'
|
title = 'Pocket'
|
||||||
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
||||||
description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
|
description = '''Personalized news feeds. Go to getpocket.com to setup up \
|
||||||
up your news. This version displays pages of articles from \
|
your news. This version displays pages of articles from \
|
||||||
oldest to newest, with max & minimum counts, and marks articles \
|
oldest to newest, with max & minimum counts, and marks articles \
|
||||||
read after downloading.'''
|
read after downloading.'''
|
||||||
publisher = 'readitlaterlist.com'
|
publisher = 'getpocket.com'
|
||||||
category = 'news, custom'
|
category = 'news, custom'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
minimum_articles = 1
|
minimum_articles = 10
|
||||||
|
mark_as_read_after_dl = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
INDEX = u'http://readitlaterlist.com'
|
INDEX = u'http://getpocket.com'
|
||||||
LOGIN = INDEX + u'/l'
|
LOGIN = INDEX + u'/l'
|
||||||
readList = []
|
readList = []
|
||||||
|
|
||||||
@ -100,9 +101,31 @@ class Readitlater(BasicNewsRecipe):
|
|||||||
br = self.get_browser()
|
br = self.get_browser()
|
||||||
for link in markList:
|
for link in markList:
|
||||||
url = self.INDEX + link
|
url = self.INDEX + link
|
||||||
|
print 'Marking read: ', url
|
||||||
response = br.open(url)
|
response = br.open(url)
|
||||||
response
|
print response.info()
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
self.mark_as_read(self.readList)
|
if self.mark_as_read_after_dl:
|
||||||
|
self.mark_as_read(self.readList)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def default_cover(self, cover_file):
|
||||||
|
'''
|
||||||
|
Create a generic cover for recipes that don't have a cover
|
||||||
|
This override adds time to the cover
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
from calibre.ebooks import calibre_cover
|
||||||
|
title = self.title if isinstance(self.title, unicode) else \
|
||||||
|
self.title.decode('utf-8', 'replace')
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
time = strftime('[%I:%M %p]')
|
||||||
|
img_data = calibre_cover(title, date, time)
|
||||||
|
cover_file.write(img_data)
|
||||||
|
cover_file.flush()
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to generate default cover')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
|
from urlparse import urlparse
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
from calibre.ebooks.BeautifulSoup import NavigableString
|
||||||
|
|
||||||
@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
|
|||||||
# Don't go down
|
# Don't go down
|
||||||
recursions = 0
|
recursions = 0
|
||||||
max_articles_per_feed = 400
|
max_articles_per_feed = 400
|
||||||
debugMessages = False
|
debugMessages = True
|
||||||
|
|
||||||
# Numeric parameter is type, controls whether we look for
|
# Numeric parameter is type, controls whether we look for
|
||||||
feedsets = [
|
feedsets = [
|
||||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
["Policy", "http://www.realclearpolicy.com/index.xml", 0],
|
||||||
|
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||||
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
||||||
# The feedburner is essentially the same as the top feed, politics.
|
# The feedburner is essentially the same as the top feed, politics.
|
||||||
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
||||||
@ -37,7 +40,9 @@ class RealClear(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
# Hints to extractPrintURL.
|
# Hints to extractPrintURL.
|
||||||
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
||||||
printhints = [
|
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
|
||||||
|
|
||||||
|
printhints = [ ["realclear", "", '' , 'printpage'],
|
||||||
["billoreilly.com", "Print this entry", 'a', ''],
|
["billoreilly.com", "Print this entry", 'a', ''],
|
||||||
["billoreilly.com", "Print This Article", 'a', ''],
|
["billoreilly.com", "Print This Article", 'a', ''],
|
||||||
["politico.com", "Print", 'a', 'share-print'],
|
["politico.com", "Print", 'a', 'share-print'],
|
||||||
@ -48,11 +53,24 @@ class RealClear(BasicNewsRecipe):
|
|||||||
# usatoday - just prints with all current crap anyhow
|
# usatoday - just prints with all current crap anyhow
|
||||||
|
|
||||||
]
|
]
|
||||||
|
# RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
|
||||||
|
# The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s
|
||||||
|
# http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
|
||||||
|
# Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
|
||||||
|
# Use the FULL PRINTPAGE URL; it formats it better too!
|
||||||
|
#
|
||||||
|
# NYT - try single page...
|
||||||
|
# Need special code - is it one page or several? Which URL?
|
||||||
|
# from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
|
||||||
|
# to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
|
||||||
|
# which is at link rel="canonical" and at <meta property="og:url" or look for "Single Page"
|
||||||
|
|
||||||
# Returns the best-guess print url.
|
# Returns the best-guess print url.
|
||||||
# The second parameter (pageURL) is returned if nothing is found.
|
# The second parameter (pageURL) is returned if nothing is found.
|
||||||
def extractPrintURL(self, pageURL):
|
def extractPrintURL(self, pageURL):
|
||||||
tagURL = pageURL
|
tagURL = pageURL
|
||||||
|
baseParse = urlparse(pageURL)
|
||||||
|
baseURL = baseParse[0]+"://"+baseParse[1]
|
||||||
hintsCount =len(self.printhints)
|
hintsCount =len(self.printhints)
|
||||||
for x in range(0,hintsCount):
|
for x in range(0,hintsCount):
|
||||||
if pageURL.find(self.printhints[x][0])== -1 :
|
if pageURL.find(self.printhints[x][0])== -1 :
|
||||||
@ -62,23 +80,37 @@ class RealClear(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(pageURL)
|
soup = self.index_to_soup(pageURL)
|
||||||
if soup is None:
|
if soup is None:
|
||||||
return pageURL
|
return pageURL
|
||||||
if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
|
||||||
|
# e.g. RealClear
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("search1")
|
print("Search by href: "+self.printhints[x][self.phHrefSearch])
|
||||||
|
printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
|
||||||
|
elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||||
|
if self.debugMessages == True :
|
||||||
|
print("Search 1: "+self.printhints[x][2]+" Attributes: ")
|
||||||
|
print(self.printhints[x][3])
|
||||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
||||||
elif len(self.printhints[x][3])>0 :
|
elif len(self.printhints[x][3])>0 :
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("search2")
|
print("search2")
|
||||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
||||||
else :
|
else :
|
||||||
|
if self.debugMessages == True:
|
||||||
|
print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
|
||||||
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
||||||
if printFind is None:
|
if printFind is None:
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("Not Found")
|
print("Not Found")
|
||||||
|
# print(soup)
|
||||||
|
print("end soup\n\n");
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(printFind)
|
print(printFind)
|
||||||
if isinstance(printFind, NavigableString)==False:
|
if isinstance(printFind, NavigableString)==False:
|
||||||
if printFind['href'] is not None:
|
if printFind['href'] is not None:
|
||||||
|
print("Check "+printFind['href']+" for base of "+baseURL)
|
||||||
|
if printFind['href'].find("http")!=0 :
|
||||||
|
return baseURL+printFind['href']
|
||||||
return printFind['href']
|
return printFind['href']
|
||||||
tag = printFind.parent
|
tag = printFind.parent
|
||||||
print(tag)
|
print(tag)
|
||||||
@ -158,6 +190,7 @@ class RealClear(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Parse the page into Python Soup
|
# Parse the page into Python Soup
|
||||||
|
|
||||||
|
#articleList = []
|
||||||
ans = []
|
ans = []
|
||||||
feedsCount = len(self.feedsets)
|
feedsCount = len(self.feedsets)
|
||||||
for x in range(0,feedsCount): # should be ,4
|
for x in range(0,feedsCount): # should be ,4
|
||||||
@ -168,3 +201,4 @@ class RealClear(BasicNewsRecipe):
|
|||||||
print(ans)
|
print(ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ Rue89
|
|||||||
|
|
||||||
__author__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'
|
__author__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Rue89(BasicNewsRecipe):
|
class Rue89(BasicNewsRecipe):
|
||||||
@ -15,23 +16,24 @@ class Rue89(BasicNewsRecipe):
|
|||||||
title = u'Rue89'
|
title = u'Rue89'
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
# From http://www.rue89.com/les-flux-rss-de-rue89
|
# From http://www.rue89.com/les-flux-rss-de-rue89
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'La Une', u'http://www.rue89.com/feed'),
|
(u'La Une', u'http://www.rue89.com/feed'),
|
||||||
(u'Rue69', u'http://www.rue89.com/rue69/feed'),
|
# Other feeds disabled, 'La Une' seems to include them all
|
||||||
(u'Eco', u'http://www.rue89.com/rue89-eco/feed'),
|
# (u'Rue69', u'http://www.rue89.com/rue69/feed'),
|
||||||
(u'Planète', u'http://www.rue89.com/rue89-planete/feed'),
|
# (u'Eco', u'http://www.rue89.com/rue89-eco/feed'),
|
||||||
(u'Sport', u'http://www.rue89.com/rue89-sport/feed'),
|
# (u'Planète', u'http://www.rue89.com/rue89-planete/feed'),
|
||||||
(u'Culture', u'http://www.rue89.com/culture/feed'),
|
# (u'Sport', u'http://www.rue89.com/rue89-sport/feed'),
|
||||||
(u'Hi-tech', u'http://www.rue89.com/hi-tech/feed'),
|
# (u'Culture', u'http://www.rue89.com/culture/feed'),
|
||||||
(u'Media', u'http://www.rue89.com/medias/feed'),
|
# (u'Hi-tech', u'http://www.rue89.com/hi-tech/feed'),
|
||||||
(u'Monde', u'http://www.rue89.com/monde/feed'),
|
# (u'Media', u'http://www.rue89.com/medias/feed'),
|
||||||
(u'Politique', u'http://www.rue89.com/politique/feed'),
|
# (u'Monde', u'http://www.rue89.com/monde/feed'),
|
||||||
(u'Societe', u'http://www.rue89.com/societe/feed'),
|
# (u'Politique', u'http://www.rue89.com/politique/feed'),
|
||||||
|
# (u'Societe', u'http://www.rue89.com/societe/feed'),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Follow redirection from feedsportal.com
|
# Follow redirection from feedsportal.com
|
||||||
@ -41,19 +43,36 @@ class Rue89(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?imprimer=1'
|
return url + '?imprimer=1'
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
conversion_options = { 'smarten_punctuation' : True }
|
conversion_options = { 'smarten_punctuation' : True }
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'article'}),
|
dict(name='div', attrs={'id':'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'id':'plus_loin'}),
|
dict(name='div', attrs={'id':'plus_loin'}),
|
||||||
|
dict(name='div', attrs={'class':'stats'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'article_tools'}),
|
dict(name='div', attrs={'id':'article_tools'}),
|
||||||
dict(name='div', attrs={'id':'plus_loin'}),
|
dict(name='div', attrs={'id':'plus_loin'}),
|
||||||
|
dict(name='div', attrs={'class':'stats'}),
|
||||||
|
dict(name='div', attrs={'class':'tools'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
extra_css = "#content { padding: 0 0; }"
|
||||||
|
|
||||||
|
# Without this, parsing of video articles returns strange results
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<script.*?</script>', re.IGNORECASE|re.DOTALL), ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# Remove whole article if it's a "zapnet" (video)
|
||||||
|
if soup.find('h1', {'class':'zapnet_title'}):
|
||||||
|
return None
|
||||||
|
# Reduce h2 titles to h3
|
||||||
|
for title in soup.findAll('h2'):
|
||||||
|
title.name = 'h3'
|
||||||
|
return soup
|
||||||
|
141
recipes/sol_haber.recipe
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Onur Gungor onurgu@gmail.com'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.sol.org.tr
|
||||||
|
'''
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SolHaberRecipe(BasicNewsRecipe):
|
||||||
|
title = u'soL Haber'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
language = 'tr'
|
||||||
|
__author__ = 'Onur Güngör'
|
||||||
|
description = 'Hayata soL''dan bakın..'
|
||||||
|
publisher = 'soL Haber'
|
||||||
|
tags = 'news, haberler, siyaset, türkiye, turkey, politics'
|
||||||
|
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : tags
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
category_dict = { 'sonuncu-kavga':'Sonuncu Kavga',
|
||||||
|
'devlet-ve-siyaset':'Devlet ve Siyaset',
|
||||||
|
'ekonomi':'Ekonomi',
|
||||||
|
'enternasyonal-gundem':'Enternasyonel Gündem',
|
||||||
|
'kent-gundemleri':'Kent Gündemleri',
|
||||||
|
'kultur-sanat':'Kültür Sanat',
|
||||||
|
'dunyadan':'Dünyadan',
|
||||||
|
'serbest-kursu':'Serbest Kürsü',
|
||||||
|
'medya':'Medya',
|
||||||
|
'liseliler':'Liseliler',
|
||||||
|
'yazarlar':'Köşe Yazıları'}
|
||||||
|
|
||||||
|
end_date = datetime.date.today().isoformat()
|
||||||
|
start_date = (datetime.date.today()-datetime.timedelta(days=1)).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
section_tuples = [['Köşe Yazıları', 'http://haber.sol.org.tr/arsiv?icerik=kose_yazisi&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
|
||||||
|
['Haberler', 'http://haber.sol.org.tr/arsiv?icerik=haber&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
|
||||||
|
['soL postal', 'http://haber.sol.org.tr/arsiv?icerik=postal&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
|
||||||
|
['Bizim Amerika', 'http://haber.sol.org.tr/arsiv?icerik=bizim_amerika&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)]]
|
||||||
|
|
||||||
|
|
||||||
|
# Disable stylesheets from site.
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
cover_margins = (20, 20, '#ffffff')
|
||||||
|
|
||||||
|
storybody_reg_exp = '^\s*(haber|kose)\s*$'
|
||||||
|
|
||||||
|
comments_reg_exp = '^\s*makale-elestiri\s*$'
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':re.compile(comments_reg_exp, re.IGNORECASE)})]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)})]
|
||||||
|
|
||||||
|
def get_masthead_title(self):
|
||||||
|
return self.title + "(" + self.end_date + ")"
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
result = []
|
||||||
|
articles_dict = dict()
|
||||||
|
|
||||||
|
author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$')
|
||||||
|
category_regexp = re.compile('^http://.*?/(.+?)/.*$')
|
||||||
|
|
||||||
|
for section_tuple in self.section_tuples:
|
||||||
|
|
||||||
|
section_title = section_tuple[0]
|
||||||
|
section_index_url = section_tuple[1]
|
||||||
|
|
||||||
|
self.log('Bölüm:', section_title, 'URL:', section_index_url)
|
||||||
|
|
||||||
|
soup = self.index_to_soup(section_index_url)
|
||||||
|
|
||||||
|
logo = soup.find('div', id='logo').find('img', src=True)
|
||||||
|
if logo is not None:
|
||||||
|
self.cover_url = logo['src']
|
||||||
|
if self.cover_url.startswith('/'):
|
||||||
|
self.cover_url = 'http://haber.sol.org.tr'+self.cover_url
|
||||||
|
|
||||||
|
view_content = soup.find('div', id='ana-icerik').find('div', attrs={'class':'view-content'})
|
||||||
|
if view_content == None:
|
||||||
|
break
|
||||||
|
rows = view_content.find('tbody').findAll('tr')
|
||||||
|
|
||||||
|
self.log('Row sayısı', len(rows))
|
||||||
|
for row in rows:
|
||||||
|
cells = row.findAll('td')
|
||||||
|
|
||||||
|
a = cells[1].find('a', href=True)
|
||||||
|
|
||||||
|
url = a['href']
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://haber.sol.org.tr'+url
|
||||||
|
|
||||||
|
category = section_title
|
||||||
|
category_match_result = category_regexp.match(url)
|
||||||
|
if category_match_result:
|
||||||
|
category = category_match_result.group(1)
|
||||||
|
|
||||||
|
date = self.tag_to_string(cells[2])
|
||||||
|
|
||||||
|
author = 'soL haber'
|
||||||
|
|
||||||
|
author_match_result = author_regexp.match(url)
|
||||||
|
if author_match_result:
|
||||||
|
author = author_match_result.group(1)
|
||||||
|
|
||||||
|
self.log('\tFound article:', title, 'at', url, 'published at ', date, 'by', author)
|
||||||
|
article = {'title':title, 'url':url, 'description':None, 'date':date, 'author':author}
|
||||||
|
if category in articles_dict:
|
||||||
|
articles_dict[category].append(article)
|
||||||
|
else:
|
||||||
|
articles_dict[category] = [article]
|
||||||
|
|
||||||
|
for category in articles_dict.keys():
|
||||||
|
if category in self.category_dict:
|
||||||
|
result.append((self.category_dict[category], articles_dict[category]))
|
||||||
|
else:
|
||||||
|
result.append((category, articles_dict[category]))
|
||||||
|
|
||||||
|
return result
|
@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//div[@id="mediaWrapper"]'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
delay = 4
|
delay = 4
|
||||||
max_connections = 1
|
max_connections = 1
|
||||||
@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
#keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||||
|
|
||||||
remove_tags = [
|
#remove_tags = [
|
||||||
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
#dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||||
,dict(name=['object','link'])
|
#,dict(name=['object','link'])
|
||||||
]
|
#]
|
||||||
|
|
||||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
|
feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]
|
||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
136
recipes/southernstar.recipe
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, watou'
|
||||||
|
'''
|
||||||
|
southernstar.ie
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||||
|
|
||||||
|
class TheSouthernStar(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'The Southern Star'
|
||||||
|
__author__ = 'watou'
|
||||||
|
description = 'West Cork\'s leading news and information provider since 1889'
|
||||||
|
NEWS_INDEX = 'http://www.southernstar.ie/news.php'
|
||||||
|
LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php'
|
||||||
|
SPORT_INDEX = 'http://www.southernstar.ie/sport.php'
|
||||||
|
CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php'
|
||||||
|
language = 'en_IE'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.southernstar.ie/images/logo.gif'
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'article'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'article'})
|
||||||
|
remove_tags = [dict(name='div', attrs={'style':'width:300px; position:relative'}),
|
||||||
|
dict(name='form'),
|
||||||
|
dict(name='div', attrs={'class':'endpanel'})]
|
||||||
|
no_stylesheets = True
|
||||||
|
tempfiles = []
|
||||||
|
pubdate = ''
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
seen_titles = set([])
|
||||||
|
|
||||||
|
articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles)
|
||||||
|
if articles:
|
||||||
|
feeds.append(('News', articles))
|
||||||
|
|
||||||
|
articles = self.fetch_ss_notes(self.LOCAL_NOTES)
|
||||||
|
if articles:
|
||||||
|
feeds.append(('Local Notes', articles))
|
||||||
|
|
||||||
|
articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles)
|
||||||
|
if articles:
|
||||||
|
feeds.append(('Sport', articles))
|
||||||
|
|
||||||
|
articles = self.fetch_ss_notes(self.CLASSIFIEDS)
|
||||||
|
if articles:
|
||||||
|
feeds.append(('Classifieds', articles))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def fetch_ss_articles(self, index, seen_titles):
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(index)
|
||||||
|
ts = soup.find('div', {'class':'article'})
|
||||||
|
ds = self.tag_to_string(ts.find('strong'))
|
||||||
|
self.pubdate = ' ['+ds+']'
|
||||||
|
self.timefmt = ' [%s]'%ds
|
||||||
|
|
||||||
|
for post in ts.findAll('h1'):
|
||||||
|
a = post.find('a', href=True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
if title in seen_titles:
|
||||||
|
continue
|
||||||
|
seen_titles.add(title)
|
||||||
|
url = a['href']
|
||||||
|
if url.startswith('article'):
|
||||||
|
url = 'http://www.southernstar.ie/'+url
|
||||||
|
self.log('\tFound article:', title, 'at', url)
|
||||||
|
p = post.findNextSibling('p')
|
||||||
|
desc = None
|
||||||
|
if p is not None:
|
||||||
|
desc = str(p)
|
||||||
|
articles.append({'title':title, 'url':url, 'description':desc,
|
||||||
|
'date':self.pubdate})
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def fetch_ss_notes(self, page):
|
||||||
|
articles = []
|
||||||
|
|
||||||
|
soup = self.index_to_soup(page)
|
||||||
|
ts = soup.find('div', {'class':'content'})
|
||||||
|
for post in ts.findAll('h1'):
|
||||||
|
title = self.tag_to_string(post)
|
||||||
|
self.log('\tFound note:', title)
|
||||||
|
f = tempfile.NamedTemporaryFile(suffix='.html',delete=False)
|
||||||
|
f.close()
|
||||||
|
f = codecs.open(f.name, 'w+b', self.encoding, 'replace')
|
||||||
|
url = "file://" + f.name
|
||||||
|
f.write(u'<html><head><meta http-equiv="Content-Type" content="text/html; charset='+
|
||||||
|
self.encoding+'"></head><body><h1>'+title+'</h1>')
|
||||||
|
f.write(str(post.findNextSibling('p')))
|
||||||
|
f.write(u'</body></html>')
|
||||||
|
self.log('\tWrote note to', f.name)
|
||||||
|
f.close()
|
||||||
|
self.tempfiles.append(f)
|
||||||
|
articles.append({'title':title, 'url':url, 'date':self.pubdate})
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for table in soup.findAll('table', align='right'):
|
||||||
|
img = table.find('img')
|
||||||
|
if img is not None:
|
||||||
|
img.extract()
|
||||||
|
caption = self.tag_to_string(table).strip()
|
||||||
|
div = Tag(soup, 'div')
|
||||||
|
div['style'] = 'text-align:center'
|
||||||
|
div.insert(0, img)
|
||||||
|
div.insert(1, Tag(soup, 'br'))
|
||||||
|
if caption:
|
||||||
|
div.insert(2, NavigableString(caption))
|
||||||
|
table.replaceWith(div)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def image_url_processor(self, baseurl, url):
|
||||||
|
return url.replace(' ','%20')
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
self.log('cleaning up')
|
||||||
|
for f in self.tempfiles:
|
||||||
|
os.unlink(f.name)
|
||||||
|
self.tempfiles = []
|
25
recipes/swiat_obrazu.recipe
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Swiat_Obrazu(BasicNewsRecipe):
|
||||||
|
title = u'Swiat Obrazu'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.'
|
||||||
|
category = 'photography'
|
||||||
|
masthead_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
|
||||||
|
cover_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript= True
|
||||||
|
use_embedded_content = False
|
||||||
|
feeds = [(u'Wszystko', u'http://www.swiatobrazu.pl/rss')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + ',drukuj'
|
||||||
|
|
||||||
|
def image_url_processor(self, baseurl, url):
|
||||||
|
if 'http://' not in url or 'https://' not in url:
|
||||||
|
return 'http://www.swiatobrazu.pl' + url[5:]
|
||||||
|
else:
|
||||||
|
return url
|
@ -8,10 +8,11 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
||||||
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
|
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
|
#remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
||||||
|
@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body)
|
self.append_page(soup, soup.body)
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
if 'tanuki-anime' in soup.title.string.lower():
|
||||||
|
a['href']='http://anime.tanuki.pl' + a['href']
|
||||||
|
elif 'tanuki-manga' in soup.title.string.lower():
|
||||||
|
a['href']='http://manga.tanuki.pl' + a['href']
|
||||||
|
elif 'tanuki-czytelnia' in soup.title.string.lower():
|
||||||
|
a['href']='http://czytelnia.tanuki.pl' + a['href']
|
||||||
return soup
|
return soup
|
62
recipes/telam.recipe
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.telam.com.ar
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Telam(BasicNewsRecipe):
|
||||||
|
title = 'Telam'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'AGENCIA DE NOTICIAS DE LA REPUBLICA ARGENTINA'
|
||||||
|
publisher = 'Telam S.E.'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es_AR'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
masthead_url = 'http://www.telam.com.ar/front/imagenes/encabezado/logotelam.jpg'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['meta','link'])]
|
||||||
|
remove_tags_before = dict(attrs={'class':'nota_fecha'})
|
||||||
|
remove_tags_after = dict(attrs={'class':'nota_completa'})
|
||||||
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimas noticias', u'http://www.telam.com.ar/xml/rss/' )
|
||||||
|
,(u'Politica' , u'http://www.telam.com.ar/xml/rss/1')
|
||||||
|
,(u'Economia' , u'http://www.telam.com.ar/xml/rss/2')
|
||||||
|
,(u'Sociedad' , u'http://www.telam.com.ar/xml/rss/3')
|
||||||
|
,(u'Policiales' , u'http://www.telam.com.ar/xml/rss/4')
|
||||||
|
,(u'Internacionales' , u'http://www.telam.com.ar/xml/rss/6')
|
||||||
|
,(u'Espectaculos' , u'http://www.telam.com.ar/xml/rss/7')
|
||||||
|
,(u'Cultura' , u'http://www.telam.com.ar/xml/rss/8')
|
||||||
|
,(u'Deportes' , u'http://www.telam.com.ar/xml/rss/9')
|
||||||
|
,(u'Telam Investiga' , u'http://www.telam.com.ar/xml/rss/5')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
artid = url.rpartition('/')[2]
|
||||||
|
return 'http://www.telam.com.ar/?codProg=imprimir-nota&id=' + artid
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
@ -1,24 +1,23 @@
|
|||||||
import re
|
import re, mechanize
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The Sun UK'
|
title = u'The Sun UK'
|
||||||
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
|
||||||
|
|
||||||
description = 'A Recipe for The Sun tabloid UK - uses feed43'
|
description = 'A Recipe for The Sun tabloid UK'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 20/2/12
|
# last updated 7/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
#auto_cleanup = True
|
||||||
|
#articles_are_obfuscated = True
|
||||||
|
|
||||||
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
|
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
|
||||||
encoding = 'cp1251'
|
encoding = 'UTF-8'
|
||||||
|
|
||||||
encoding = 'cp1252'
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -30,13 +29,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
(re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
|
dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
|
||||||
dict(name='div',attrs={'class' : 'text-center'}),
|
dict(name='div',attrs={'class' : 'text-center'}),
|
||||||
dict(name='div',attrs={'id' : 'bodyText'})
|
dict(name='div',attrs={'id' : 'bodyText'})
|
||||||
# dict(name='p')
|
# dict(name='p')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags=[
|
remove_tags=[
|
||||||
#dict(name='head'),
|
#dict(name='head'),
|
||||||
dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
|
dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
|
||||||
@ -46,12 +46,46 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News','http://feed43.com/2517447382644748.xml'),
|
(u'News','http://feed43.com/2517447382644748.xml'),
|
||||||
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
|
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
|
||||||
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
|
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
|
||||||
(u'Film',u'http://feed43.com/1307545221226200.xml'),
|
(u'Film',u'http://feed43.com/1307545221226200.xml'),
|
||||||
(u'Music',u'http://feed43.com/1701513435064132.xml'),
|
(u'Music',u'http://feed43.com/1701513435064132.xml'),
|
||||||
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
|
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||||
|
# look for the block containing the sun button and url
|
||||||
|
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#cov = soup.find(attrs={'id' : 'large'})
|
||||||
|
cov2 = str(cov)
|
||||||
|
|
||||||
|
cov2='http://www.politicshome.com'+cov2[9:-133]
|
||||||
|
#cov2 now contains url of the page containing pic
|
||||||
|
|
||||||
|
#cov2 now contains url of the page containing pic
|
||||||
|
soup = self.index_to_soup(cov2)
|
||||||
|
cov = soup.find(attrs={'id' : 'large'})
|
||||||
|
cov2 = str(cov)
|
||||||
|
cov2=cov2[27:-18]
|
||||||
|
#cov2 now is pic url, now go back to original function
|
||||||
|
|
||||||
|
br = mechanize.Browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
|
try:
|
||||||
|
br.open_novisit(cov2)
|
||||||
|
cover_url = cov2
|
||||||
|
except:
|
||||||
|
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||||
|
|
||||||
|
#cover_url = cov2
|
||||||
|
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,6 +11,8 @@ class TPM_uk(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Title says it all'
|
description = 'Title says it all'
|
||||||
publisher = "The Philosophers' Magazine"
|
publisher = "The Philosophers' Magazine"
|
||||||
|
recipe_disabled = ('This recipe has been disabled as the website has'
|
||||||
|
' started providing articles only in PDF form')
|
||||||
category = 'philosophy, news'
|
category = 'philosophy, news'
|
||||||
oldest_article = 25
|
oldest_article = 25
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
|
@ -1,71 +1,12 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Trouw(BasicNewsRecipe):
|
class BasicUserRecipe1333905513(BasicNewsRecipe):
|
||||||
title = u'Trouw'
|
title = u'Trouw'
|
||||||
__author__ = u'JvdW'
|
__author__ = 'asalet_r'
|
||||||
|
language = 'nl'
|
||||||
description = u'Trouw de Verdieping'
|
description = u'Trouw de Verdieping'
|
||||||
oldest_article = 7
|
oldest_article = 1
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
language = u'nl'
|
auto_cleanup = True
|
||||||
simultaneous_downloads = 1
|
|
||||||
delay = 1
|
|
||||||
# timefmt = ' [%A, %d %B, %Y]'
|
|
||||||
timefmt = ''
|
|
||||||
no_stylesheets = True
|
|
||||||
cover_url = 'http://www.trouw.nl/template/ver2-0/images/trouw_logo.gif'
|
|
||||||
|
|
||||||
# keep_only_tags = [ dict(name='div', attrs={'id':'content'}) ]
|
feeds = [(u'Nederland', u'http://www.trouw.nl/nieuws/nederland/rss.xml'), (u'Buitenland', u'http://www.trouw.nl/nieuws/buitenland/rss.xml'), (u'Politiek', u'http://www.trouw.nl/nieuws/politiek/rss.xml'), (u'Economie', u'http://www.trouw.nl/nieuws/economie/rss.xml'), (u'Sport', u'http://www.trouw.nl/nieuws/sport/rss.xml'), (u'Cultuur', u'http://www.trouw.nl/nieuws/cultuur/rss.xml'), (u'Gezondheid', u'http://www.trouw.nl/nieuws/gezondheid/rss.xml'), (u'Onderwijs', u'http://www.trouw.nl/nieuws/onderwijs/rss.xml'), (u'Opinie', u'http://www.trouw.nl/opinie/rss.xml'), (u'Groen', u'http://www.trouw.nl/groen/rss.xml'), (u'Religie-Filosofie', u'http://www.trouw.nl/religie-filosofie/rss.xml'), (u'Schrijf', u'http://www.trouw.nl/schrijf/rss.xml'), (u'Moderne Manieren', u'http://www.trouw.nl/moderne-manieren/rss.xml')]
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id' :'leaderboard' })
|
|
||||||
,dict(name='div', attrs={'class':'banner' })
|
|
||||||
,dict(name='div', attrs={'id' :'header' })
|
|
||||||
,dict(name='div', attrs={'class':'options' })
|
|
||||||
,dict(name='div', attrs={'id' :'menu_main' })
|
|
||||||
,dict(name='div', attrs={'id' :'menu_sub' })
|
|
||||||
,dict(name='div', attrs={'id' :'column_right' })
|
|
||||||
,dict(name='div', attrs={'class':'meta_information'})
|
|
||||||
,dict(name='div', attrs={'id' :'comments_form' })
|
|
||||||
,dict(name='div', attrs={'id' :'mailfriend' })
|
|
||||||
,dict(name='div', attrs={'id' :'footer' })
|
|
||||||
,dict(name='img', attrs={'id' :'dot_clear' })
|
|
||||||
]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['columns'])]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Algemen', u'http://www.trouw.nl/?service=rss'),
|
|
||||||
(u'Nederland', u'http://www.trouw.nl/nieuws/nederland/?service=rss'),
|
|
||||||
(u'Europa', u'http://www.trouw.nl/nieuws/europa/?service=rss'),
|
|
||||||
(u'Wereld', u'http://www.trouw.nl/nieuws/wereld/?service=rss'),
|
|
||||||
(u'Economie', u'http://www.trouw.nl/nieuws/economie/?service=rss'),
|
|
||||||
(u'Wetenschap', u'http://www.trouw.nl/nieuws/Wetenschap/?service=rss'),
|
|
||||||
(u'Groen', u'http://www.trouw.nl/groen/?service=rss'),
|
|
||||||
(u'Religie en Filosofie', u'http://www.trouw.nl/religie-filosofie/?service=rss'),
|
|
||||||
(u'Politiek', u'http://www.trouw.nl/nieuws/politiek/?service=rss'),
|
|
||||||
(u'Zorg', u'http://www.trouw.nl/nieuws/zorg/?service=rss'),
|
|
||||||
(u'Onderwijs', u'http://www.trouw.nl/onderwijs/nieuws/?service=rss'),
|
|
||||||
(u'Sport', u'http://www.trouw.nl/nieuws/sport/?service=rss'),
|
|
||||||
(u'Achtergrond', u'http://www.trouw.nl/achtergrond/?service=rss'),
|
|
||||||
(u'De Verdieping', u'http://www.trouw.nl/achtergrond/deverdieping/?service=rss'),
|
|
||||||
(u'Naschrift', u'http://www.trouw.nl/achtergrond/Naschrift/?service=rss'),
|
|
||||||
(u'Opinie', u'http://www.trouw.nl/opinie/?service=rss'),
|
|
||||||
(u'Podium', u'http://www.trouw.nl/opinie/podium/?service=rss'),
|
|
||||||
(u'Commentaar', u'http://www.trouw.nl/opinie/commentaar/?service=rss'),
|
|
||||||
(u'Cultuur', u'http://www.trouw.nl/cultuur/?service=rss'),
|
|
||||||
(u'Boeken', u'http://www.trouw.nl/cultuur/boeken/?service=rss'),
|
|
||||||
(u'Film', u'http://www.trouw.nl/cultuur/film/?service=rss'),
|
|
||||||
(u'Beeldende kunst', u'http://www.trouw.nl/cultuur/beeldendekunst/?service=rss'),
|
|
||||||
(u'Theater', u'http://www.trouw.nl/cultuur/theater/?service=rss'),
|
|
||||||
(u'Muziek', u'http://www.trouw.nl/cultuur/muziek/?service=rss'),
|
|
||||||
(u'Kinderen', u'http://www.trouw.nl/cultuur/kinderen/?service=rss'),
|
|
||||||
(u'Ontspanning', u'http://www.trouw.nl/ontspanning/?service=rss'),
|
|
||||||
(u'De Gids', u'http://www.trouw.nl/ontspanning/degids/?service=rss'),
|
|
||||||
(u'Moderne manieren', u'http://www.trouw.nl/ontspanning/modernemanieren/?service=rss'),
|
|
||||||
(u'Reizen', u'http://www.trouw.nl/ontspanning/reizen/?service=rss'),
|
|
||||||
(u'Koken', u'http://www.trouw.nl/ontspanning/koken/?service=rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?all=true'
|
|
||||||
|
@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
|
|||||||
cover_url='http://webhosting.pl/images/logo.png'
|
cover_url='http://webhosting.pl/images/logo.png'
|
||||||
masthead_url='http://webhosting.pl/images/logo.png'
|
masthead_url='http://webhosting.pl/images/logo.png'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
index='http://webhosting.pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
@ -36,4 +37,10 @@ class webhosting_pl(BasicNewsRecipe):
|
|||||||
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
|
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('webhosting.pl', 'webhosting.pl/print')
|
return url.replace('webhosting.pl', 'webhosting.pl/print')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup('a'):
|
||||||
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
|
a['href']=self.index + a['href']
|
||||||
|
return soup
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||||
title = u'WNP'
|
title = u'WNP'
|
||||||
@ -8,10 +8,11 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
description = u'Wirtualny Nowy Przemysł'
|
description = u'Wirtualny Nowy Przemysł'
|
||||||
category = 'economy'
|
category = 'economy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
|
remove_tags=[dict(attrs={'class':'printF'})]
|
||||||
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||||
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||||
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||||
@ -19,3 +20,7 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
||||||
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
||||||
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://wnp.pl/drukuj/' +url[url.find(',')+1:]
|
@ -21,7 +21,7 @@ class XkcdCom(BasicNewsRecipe):
|
|||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 60
|
oldest_article = 60
|
||||||
keep_only_tags = [dict(id='middleContent')]
|
keep_only_tags = [dict(id='middleContainer')]
|
||||||
remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
|
remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
# turn image bubblehelp into a paragraph
|
# turn image bubblehelp into a paragraph
|
||||||
|
18
recipes/zerocalcare.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '7, April 2012'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1333705905(BasicNewsRecipe):
|
||||||
|
title = u'Zerocalcare'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'Free Italian Comics'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = False
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'main entry-content group'})
|
||||||
|
]
|
||||||
|
masthead_url = 'http://zerocalcare.it/wp-content/uploads/2011/11/zerocalcare-banner.jpg'
|
||||||
|
feeds = [(u'Zerocalcare', u'http://feeds.feedburner.com/Zerocalcareit')]
|
@ -26,7 +26,7 @@ def login_to_google(username, password):
|
|||||||
br.form['Email'] = username
|
br.form['Email'] = username
|
||||||
br.form['Passwd'] = password
|
br.form['Passwd'] = password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if re.search(br'<title>.*?Account Settings</title>', raw) is None:
|
if re.search(br'(?i)<title>.*?Account Settings</title>', raw) is None:
|
||||||
x = re.search(br'(?is)<title>.*?</title>', raw)
|
x = re.search(br'(?is)<title>.*?</title>', raw)
|
||||||
if x is not None:
|
if x is not None:
|
||||||
print ('Title of post login page: %s'%x.group())
|
print ('Title of post login page: %s'%x.group())
|
||||||
|
@ -48,7 +48,7 @@ class Push(Command):
|
|||||||
threads = []
|
threads = []
|
||||||
for host in (
|
for host in (
|
||||||
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
|
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
|
||||||
'kovid@leopard_test:calibre',
|
'kovid@ox:calibre',
|
||||||
r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
|
r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
|
||||||
):
|
):
|
||||||
rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
|
rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
|
||||||
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
|||||||
from setup.installer.windows.wix import WixMixIn
|
from setup.installer.windows.wix import WixMixIn
|
||||||
|
|
||||||
OPENSSL_DIR = r'Q:\openssl'
|
OPENSSL_DIR = r'Q:\openssl'
|
||||||
QT_DIR = 'Q:\\Qt\\4.8.0'
|
QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
SW = r'C:\cygwin\home\kovid\sw'
|
SW = r'C:\cygwin\home\kovid\sw'
|
||||||
|
@ -12,14 +12,14 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2011-12-14 19:48+0000\n"
|
"PO-Revision-Date: 2012-04-12 09:56+0000\n"
|
||||||
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
|
||||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2011-12-15 05:18+0000\n"
|
"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n"
|
||||||
"X-Generator: Launchpad (build 14487)\n"
|
"X-Generator: Launchpad (build 15070)\n"
|
||||||
"Language: ca\n"
|
"Language: ca\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
@ -9584,31 +9584,31 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for hoi
|
#. name for hoi
|
||||||
msgid "Holikachuk"
|
msgid "Holikachuk"
|
||||||
msgstr ""
|
msgstr "Holikachuk"
|
||||||
|
|
||||||
#. name for hoj
|
#. name for hoj
|
||||||
msgid "Hadothi"
|
msgid "Hadothi"
|
||||||
msgstr ""
|
msgstr "Hadothi"
|
||||||
|
|
||||||
#. name for hol
|
#. name for hol
|
||||||
msgid "Holu"
|
msgid "Holu"
|
||||||
msgstr ""
|
msgstr "Holu"
|
||||||
|
|
||||||
#. name for hom
|
#. name for hom
|
||||||
msgid "Homa"
|
msgid "Homa"
|
||||||
msgstr ""
|
msgstr "Homa"
|
||||||
|
|
||||||
#. name for hoo
|
#. name for hoo
|
||||||
msgid "Holoholo"
|
msgid "Holoholo"
|
||||||
msgstr ""
|
msgstr "Holoholo"
|
||||||
|
|
||||||
#. name for hop
|
#. name for hop
|
||||||
msgid "Hopi"
|
msgid "Hopi"
|
||||||
msgstr ""
|
msgstr "Hopi"
|
||||||
|
|
||||||
#. name for hor
|
#. name for hor
|
||||||
msgid "Horo"
|
msgid "Horo"
|
||||||
msgstr ""
|
msgstr "Horo"
|
||||||
|
|
||||||
#. name for hos
|
#. name for hos
|
||||||
msgid "Ho Chi Minh City Sign Language"
|
msgid "Ho Chi Minh City Sign Language"
|
||||||
@ -9616,15 +9616,15 @@ msgstr "Llenguatge de signes de la ciutat de Ho Chi Minh"
|
|||||||
|
|
||||||
#. name for hot
|
#. name for hot
|
||||||
msgid "Hote"
|
msgid "Hote"
|
||||||
msgstr ""
|
msgstr "Hote"
|
||||||
|
|
||||||
#. name for hov
|
#. name for hov
|
||||||
msgid "Hovongan"
|
msgid "Hovongan"
|
||||||
msgstr ""
|
msgstr "Hovongan"
|
||||||
|
|
||||||
#. name for how
|
#. name for how
|
||||||
msgid "Honi"
|
msgid "Honi"
|
||||||
msgstr ""
|
msgstr "Honi"
|
||||||
|
|
||||||
#. name for hoy
|
#. name for hoy
|
||||||
msgid "Holiya"
|
msgid "Holiya"
|
||||||
@ -9636,7 +9636,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for hpo
|
#. name for hpo
|
||||||
msgid "Hpon"
|
msgid "Hpon"
|
||||||
msgstr ""
|
msgstr "Hpon"
|
||||||
|
|
||||||
#. name for hps
|
#. name for hps
|
||||||
msgid "Hawai'i Pidgin Sign Language"
|
msgid "Hawai'i Pidgin Sign Language"
|
||||||
@ -9644,35 +9644,35 @@ msgstr "Hawaià Pidgin; llenguatge de signes"
|
|||||||
|
|
||||||
#. name for hra
|
#. name for hra
|
||||||
msgid "Hrangkhol"
|
msgid "Hrangkhol"
|
||||||
msgstr ""
|
msgstr "Hrangkhol"
|
||||||
|
|
||||||
#. name for hre
|
#. name for hre
|
||||||
msgid "Hre"
|
msgid "Hre"
|
||||||
msgstr ""
|
msgstr "Hre"
|
||||||
|
|
||||||
#. name for hrk
|
#. name for hrk
|
||||||
msgid "Haruku"
|
msgid "Haruku"
|
||||||
msgstr ""
|
msgstr "Haruku"
|
||||||
|
|
||||||
#. name for hrm
|
#. name for hrm
|
||||||
msgid "Miao; Horned"
|
msgid "Miao; Horned"
|
||||||
msgstr ""
|
msgstr "Miao; Horned"
|
||||||
|
|
||||||
#. name for hro
|
#. name for hro
|
||||||
msgid "Haroi"
|
msgid "Haroi"
|
||||||
msgstr ""
|
msgstr "Haroi"
|
||||||
|
|
||||||
#. name for hrr
|
#. name for hrr
|
||||||
msgid "Horuru"
|
msgid "Horuru"
|
||||||
msgstr ""
|
msgstr "Horuru"
|
||||||
|
|
||||||
#. name for hrt
|
#. name for hrt
|
||||||
msgid "Hértevin"
|
msgid "Hértevin"
|
||||||
msgstr ""
|
msgstr "Hértevin"
|
||||||
|
|
||||||
#. name for hru
|
#. name for hru
|
||||||
msgid "Hruso"
|
msgid "Hruso"
|
||||||
msgstr ""
|
msgstr "Hruso"
|
||||||
|
|
||||||
#. name for hrv
|
#. name for hrv
|
||||||
msgid "Croatian"
|
msgid "Croatian"
|
||||||
@ -9680,11 +9680,11 @@ msgstr "Croat"
|
|||||||
|
|
||||||
#. name for hrx
|
#. name for hrx
|
||||||
msgid "Hunsrik"
|
msgid "Hunsrik"
|
||||||
msgstr ""
|
msgstr "Hunsrik"
|
||||||
|
|
||||||
#. name for hrz
|
#. name for hrz
|
||||||
msgid "Harzani"
|
msgid "Harzani"
|
||||||
msgstr ""
|
msgstr "Harzani"
|
||||||
|
|
||||||
#. name for hsb
|
#. name for hsb
|
||||||
msgid "Sorbian; Upper"
|
msgid "Sorbian; Upper"
|
||||||
@ -9704,31 +9704,31 @@ msgstr "Xinès; Xiang"
|
|||||||
|
|
||||||
#. name for hss
|
#. name for hss
|
||||||
msgid "Harsusi"
|
msgid "Harsusi"
|
||||||
msgstr ""
|
msgstr "Harsusi"
|
||||||
|
|
||||||
#. name for hti
|
#. name for hti
|
||||||
msgid "Hoti"
|
msgid "Hoti"
|
||||||
msgstr ""
|
msgstr "Hoti"
|
||||||
|
|
||||||
#. name for hto
|
#. name for hto
|
||||||
msgid "Huitoto; Minica"
|
msgid "Huitoto; Minica"
|
||||||
msgstr ""
|
msgstr "Huitoto; Minica"
|
||||||
|
|
||||||
#. name for hts
|
#. name for hts
|
||||||
msgid "Hadza"
|
msgid "Hadza"
|
||||||
msgstr ""
|
msgstr "Hadza"
|
||||||
|
|
||||||
#. name for htu
|
#. name for htu
|
||||||
msgid "Hitu"
|
msgid "Hitu"
|
||||||
msgstr ""
|
msgstr "Hitu"
|
||||||
|
|
||||||
#. name for htx
|
#. name for htx
|
||||||
msgid "Hittite; Middle"
|
msgid "Hittite; Middle"
|
||||||
msgstr ""
|
msgstr "Hittite; Middle"
|
||||||
|
|
||||||
#. name for hub
|
#. name for hub
|
||||||
msgid "Huambisa"
|
msgid "Huambisa"
|
||||||
msgstr ""
|
msgstr "Huambisa"
|
||||||
|
|
||||||
#. name for huc
|
#. name for huc
|
||||||
msgid "=/Hua"
|
msgid "=/Hua"
|
||||||
@ -9736,27 +9736,27 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for hud
|
#. name for hud
|
||||||
msgid "Huaulu"
|
msgid "Huaulu"
|
||||||
msgstr ""
|
msgstr "Huaulu"
|
||||||
|
|
||||||
#. name for hue
|
#. name for hue
|
||||||
msgid "Huave; San Francisco Del Mar"
|
msgid "Huave; San Francisco Del Mar"
|
||||||
msgstr ""
|
msgstr "Huave; San Francisco Del Mar"
|
||||||
|
|
||||||
#. name for huf
|
#. name for huf
|
||||||
msgid "Humene"
|
msgid "Humene"
|
||||||
msgstr ""
|
msgstr "Humene"
|
||||||
|
|
||||||
#. name for hug
|
#. name for hug
|
||||||
msgid "Huachipaeri"
|
msgid "Huachipaeri"
|
||||||
msgstr ""
|
msgstr "Huachipaeri"
|
||||||
|
|
||||||
#. name for huh
|
#. name for huh
|
||||||
msgid "Huilliche"
|
msgid "Huilliche"
|
||||||
msgstr ""
|
msgstr "Huilliche"
|
||||||
|
|
||||||
#. name for hui
|
#. name for hui
|
||||||
msgid "Huli"
|
msgid "Huli"
|
||||||
msgstr ""
|
msgstr "Huli"
|
||||||
|
|
||||||
#. name for huj
|
#. name for huj
|
||||||
msgid "Miao; Northern Guiyang"
|
msgid "Miao; Northern Guiyang"
|
||||||
@ -9764,15 +9764,15 @@ msgstr "Miao; Guiyang septentrional"
|
|||||||
|
|
||||||
#. name for huk
|
#. name for huk
|
||||||
msgid "Hulung"
|
msgid "Hulung"
|
||||||
msgstr ""
|
msgstr "Hulung"
|
||||||
|
|
||||||
#. name for hul
|
#. name for hul
|
||||||
msgid "Hula"
|
msgid "Hula"
|
||||||
msgstr ""
|
msgstr "Hula"
|
||||||
|
|
||||||
#. name for hum
|
#. name for hum
|
||||||
msgid "Hungana"
|
msgid "Hungana"
|
||||||
msgstr ""
|
msgstr "Hungana"
|
||||||
|
|
||||||
#. name for hun
|
#. name for hun
|
||||||
msgid "Hungarian"
|
msgid "Hungarian"
|
||||||
@ -9780,43 +9780,43 @@ msgstr "Hongarès"
|
|||||||
|
|
||||||
#. name for huo
|
#. name for huo
|
||||||
msgid "Hu"
|
msgid "Hu"
|
||||||
msgstr ""
|
msgstr "Hu"
|
||||||
|
|
||||||
#. name for hup
|
#. name for hup
|
||||||
msgid "Hupa"
|
msgid "Hupa"
|
||||||
msgstr ""
|
msgstr "Hupa"
|
||||||
|
|
||||||
#. name for huq
|
#. name for huq
|
||||||
msgid "Tsat"
|
msgid "Tsat"
|
||||||
msgstr ""
|
msgstr "Tsat"
|
||||||
|
|
||||||
#. name for hur
|
#. name for hur
|
||||||
msgid "Halkomelem"
|
msgid "Halkomelem"
|
||||||
msgstr ""
|
msgstr "Halkomelem"
|
||||||
|
|
||||||
#. name for hus
|
#. name for hus
|
||||||
msgid "Huastec"
|
msgid "Huastec"
|
||||||
msgstr ""
|
msgstr "Huastec"
|
||||||
|
|
||||||
#. name for hut
|
#. name for hut
|
||||||
msgid "Humla"
|
msgid "Humla"
|
||||||
msgstr ""
|
msgstr "Humla"
|
||||||
|
|
||||||
#. name for huu
|
#. name for huu
|
||||||
msgid "Huitoto; Murui"
|
msgid "Huitoto; Murui"
|
||||||
msgstr ""
|
msgstr "Huitoto; Murui"
|
||||||
|
|
||||||
#. name for huv
|
#. name for huv
|
||||||
msgid "Huave; San Mateo Del Mar"
|
msgid "Huave; San Mateo Del Mar"
|
||||||
msgstr ""
|
msgstr "Huave; San Mateo Del Mar"
|
||||||
|
|
||||||
#. name for huw
|
#. name for huw
|
||||||
msgid "Hukumina"
|
msgid "Hukumina"
|
||||||
msgstr ""
|
msgstr "Hukumina"
|
||||||
|
|
||||||
#. name for hux
|
#. name for hux
|
||||||
msgid "Huitoto; Nüpode"
|
msgid "Huitoto; Nüpode"
|
||||||
msgstr ""
|
msgstr "Huitoto; Nüpode"
|
||||||
|
|
||||||
#. name for huy
|
#. name for huy
|
||||||
msgid "Hulaulá"
|
msgid "Hulaulá"
|
||||||
|
@ -8,14 +8,14 @@ msgstr ""
|
|||||||
"Project-Id-Version: calibre\n"
|
"Project-Id-Version: calibre\n"
|
||||||
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
|
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2012-03-11 10:13+0000\n"
|
"PO-Revision-Date: 2012-04-18 20:56+0000\n"
|
||||||
"Last-Translator: Jellby <Unknown>\n"
|
"Last-Translator: David de Obregon <Unknown>\n"
|
||||||
"Language-Team: Spanish <es@li.org>\n"
|
"Language-Team: Spanish <es@li.org>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2012-03-12 04:38+0000\n"
|
"X-Launchpad-Export-Date: 2012-04-19 04:37+0000\n"
|
||||||
"X-Generator: Launchpad (build 14933)\n"
|
"X-Generator: Launchpad (build 15108)\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
msgid "Ghotuo"
|
msgid "Ghotuo"
|
||||||
@ -4931,7 +4931,7 @@ msgstr "Como karim"
|
|||||||
|
|
||||||
#. name for cfm
|
#. name for cfm
|
||||||
msgid "Chin; Falam"
|
msgid "Chin; Falam"
|
||||||
msgstr ""
|
msgstr "Chin; Falam"
|
||||||
|
|
||||||
#. name for cga
|
#. name for cga
|
||||||
msgid "Changriwa"
|
msgid "Changriwa"
|
||||||
@ -5071,7 +5071,7 @@ msgstr "Chinali"
|
|||||||
|
|
||||||
#. name for cik
|
#. name for cik
|
||||||
msgid "Kinnauri; Chitkuli"
|
msgid "Kinnauri; Chitkuli"
|
||||||
msgstr ""
|
msgstr "Kinnauri; Chitkuli"
|
||||||
|
|
||||||
#. name for cim
|
#. name for cim
|
||||||
msgid "Cimbrian"
|
msgid "Cimbrian"
|
||||||
@ -5147,7 +5147,7 @@ msgstr "Chino jin"
|
|||||||
|
|
||||||
#. name for cka
|
#. name for cka
|
||||||
msgid "Chin; Khumi Awa"
|
msgid "Chin; Khumi Awa"
|
||||||
msgstr ""
|
msgstr "Chin; Khumi Awa"
|
||||||
|
|
||||||
#. name for ckb
|
#. name for ckb
|
||||||
msgid "Kurdish; Central"
|
msgid "Kurdish; Central"
|
||||||
@ -5287,7 +5287,7 @@ msgstr "Mnong central"
|
|||||||
|
|
||||||
#. name for cmr
|
#. name for cmr
|
||||||
msgid "Chin; Mro"
|
msgid "Chin; Mro"
|
||||||
msgstr ""
|
msgstr "Chin; Mro"
|
||||||
|
|
||||||
#. name for cms
|
#. name for cms
|
||||||
msgid "Messapic"
|
msgid "Messapic"
|
||||||
@ -5303,7 +5303,7 @@ msgstr "Changthang"
|
|||||||
|
|
||||||
#. name for cnb
|
#. name for cnb
|
||||||
msgid "Chin; Chinbon"
|
msgid "Chin; Chinbon"
|
||||||
msgstr ""
|
msgstr "Chin; Chinbon"
|
||||||
|
|
||||||
#. name for cnc
|
#. name for cnc
|
||||||
msgid "Côông"
|
msgid "Côông"
|
||||||
@ -5315,7 +5315,7 @@ msgstr "Qiang septentrional"
|
|||||||
|
|
||||||
#. name for cnh
|
#. name for cnh
|
||||||
msgid "Chin; Haka"
|
msgid "Chin; Haka"
|
||||||
msgstr ""
|
msgstr "Chin; Haka"
|
||||||
|
|
||||||
#. name for cni
|
#. name for cni
|
||||||
msgid "Asháninka"
|
msgid "Asháninka"
|
||||||
@ -5323,7 +5323,7 @@ msgstr "Asháninka"
|
|||||||
|
|
||||||
#. name for cnk
|
#. name for cnk
|
||||||
msgid "Chin; Khumi"
|
msgid "Chin; Khumi"
|
||||||
msgstr ""
|
msgstr "Chin; Khumi"
|
||||||
|
|
||||||
#. name for cnl
|
#. name for cnl
|
||||||
msgid "Chinantec; Lalana"
|
msgid "Chinantec; Lalana"
|
||||||
@ -5347,7 +5347,7 @@ msgstr "Chenoua"
|
|||||||
|
|
||||||
#. name for cnw
|
#. name for cnw
|
||||||
msgid "Chin; Ngawn"
|
msgid "Chin; Ngawn"
|
||||||
msgstr ""
|
msgstr "Chin; Ngawn"
|
||||||
|
|
||||||
#. name for cnx
|
#. name for cnx
|
||||||
msgid "Cornish; Middle"
|
msgid "Cornish; Middle"
|
||||||
@ -5459,7 +5459,7 @@ msgstr "Chinanteco de Palantla"
|
|||||||
|
|
||||||
#. name for cpb
|
#. name for cpb
|
||||||
msgid "Ashéninka; Ucayali-Yurúa"
|
msgid "Ashéninka; Ucayali-Yurúa"
|
||||||
msgstr ""
|
msgstr "Ashéninka; Ucayali-Yurúa"
|
||||||
|
|
||||||
#. name for cpc
|
#. name for cpc
|
||||||
msgid "Ajyíninka Apurucayali"
|
msgid "Ajyíninka Apurucayali"
|
||||||
@ -5483,7 +5483,7 @@ msgstr "Capiznon"
|
|||||||
|
|
||||||
#. name for cpu
|
#. name for cpu
|
||||||
msgid "Ashéninka; Pichis"
|
msgid "Ashéninka; Pichis"
|
||||||
msgstr ""
|
msgstr "Ashéninka; Pichis"
|
||||||
|
|
||||||
#. name for cpx
|
#. name for cpx
|
||||||
msgid "Chinese; Pu-Xian"
|
msgid "Chinese; Pu-Xian"
|
||||||
@ -5491,11 +5491,11 @@ msgstr "Chino puxian"
|
|||||||
|
|
||||||
#. name for cpy
|
#. name for cpy
|
||||||
msgid "Ashéninka; South Ucayali"
|
msgid "Ashéninka; South Ucayali"
|
||||||
msgstr ""
|
msgstr "Ashéninka; South Ucayali"
|
||||||
|
|
||||||
#. name for cqd
|
#. name for cqd
|
||||||
msgid "Miao; Chuanqiandian Cluster"
|
msgid "Miao; Chuanqiandian Cluster"
|
||||||
msgstr ""
|
msgstr "Miao; Chuanqiandian Cluster"
|
||||||
|
|
||||||
#. name for cqu
|
#. name for cqu
|
||||||
msgid "Quechua; Chilean"
|
msgid "Quechua; Chilean"
|
||||||
@ -5507,7 +5507,7 @@ msgstr "Chara"
|
|||||||
|
|
||||||
#. name for crb
|
#. name for crb
|
||||||
msgid "Carib; Island"
|
msgid "Carib; Island"
|
||||||
msgstr ""
|
msgstr "Carib; Island"
|
||||||
|
|
||||||
#. name for crc
|
#. name for crc
|
||||||
msgid "Lonwolwol"
|
msgid "Lonwolwol"
|
||||||
@ -5539,23 +5539,23 @@ msgstr "Forro"
|
|||||||
|
|
||||||
#. name for crj
|
#. name for crj
|
||||||
msgid "Cree; Southern East"
|
msgid "Cree; Southern East"
|
||||||
msgstr ""
|
msgstr "Cree; Southern East"
|
||||||
|
|
||||||
#. name for crk
|
#. name for crk
|
||||||
msgid "Cree; Plains"
|
msgid "Cree; Plains"
|
||||||
msgstr ""
|
msgstr "Cree; Plains"
|
||||||
|
|
||||||
#. name for crl
|
#. name for crl
|
||||||
msgid "Cree; Northern East"
|
msgid "Cree; Northern East"
|
||||||
msgstr ""
|
msgstr "Cree; Northern East"
|
||||||
|
|
||||||
#. name for crm
|
#. name for crm
|
||||||
msgid "Cree; Moose"
|
msgid "Cree; Moose"
|
||||||
msgstr ""
|
msgstr "Cree; Moose"
|
||||||
|
|
||||||
#. name for crn
|
#. name for crn
|
||||||
msgid "Cora; El Nayar"
|
msgid "Cora; El Nayar"
|
||||||
msgstr ""
|
msgstr "Cora; El Nayar"
|
||||||
|
|
||||||
#. name for cro
|
#. name for cro
|
||||||
msgid "Crow"
|
msgid "Crow"
|
||||||
@ -5563,11 +5563,11 @@ msgstr "Crow"
|
|||||||
|
|
||||||
#. name for crq
|
#. name for crq
|
||||||
msgid "Chorote; Iyo'wujwa"
|
msgid "Chorote; Iyo'wujwa"
|
||||||
msgstr ""
|
msgstr "Chorote; Iyo'wujwa"
|
||||||
|
|
||||||
#. name for crr
|
#. name for crr
|
||||||
msgid "Algonquian; Carolina"
|
msgid "Algonquian; Carolina"
|
||||||
msgstr ""
|
msgstr "Algonquian; Carolina"
|
||||||
|
|
||||||
#. name for crs
|
#. name for crs
|
||||||
msgid "Creole French; Seselwa"
|
msgid "Creole French; Seselwa"
|
||||||
@ -5575,7 +5575,7 @@ msgstr "Francés criollo seychellense"
|
|||||||
|
|
||||||
#. name for crt
|
#. name for crt
|
||||||
msgid "Chorote; Iyojwa'ja"
|
msgid "Chorote; Iyojwa'ja"
|
||||||
msgstr ""
|
msgstr "Chorote; Iyojwa'ja"
|
||||||
|
|
||||||
#. name for crv
|
#. name for crv
|
||||||
msgid "Chaura"
|
msgid "Chaura"
|
||||||
@ -5627,11 +5627,11 @@ msgstr "Lengua de signos chilena"
|
|||||||
|
|
||||||
#. name for csh
|
#. name for csh
|
||||||
msgid "Chin; Asho"
|
msgid "Chin; Asho"
|
||||||
msgstr ""
|
msgstr "Chin; Asho"
|
||||||
|
|
||||||
#. name for csi
|
#. name for csi
|
||||||
msgid "Miwok; Coast"
|
msgid "Miwok; Coast"
|
||||||
msgstr ""
|
msgstr "Miwok; Coast"
|
||||||
|
|
||||||
#. name for csk
|
#. name for csk
|
||||||
msgid "Jola-Kasa"
|
msgid "Jola-Kasa"
|
||||||
@ -5643,7 +5643,7 @@ msgstr "Lengua de signos china"
|
|||||||
|
|
||||||
#. name for csm
|
#. name for csm
|
||||||
msgid "Miwok; Central Sierra"
|
msgid "Miwok; Central Sierra"
|
||||||
msgstr ""
|
msgstr "Miwok; Central Sierra"
|
||||||
|
|
||||||
#. name for csn
|
#. name for csn
|
||||||
msgid "Colombian Sign Language"
|
msgid "Colombian Sign Language"
|
||||||
@ -5671,11 +5671,11 @@ msgstr "Ohlone septentrional"
|
|||||||
|
|
||||||
#. name for csw
|
#. name for csw
|
||||||
msgid "Cree; Swampy"
|
msgid "Cree; Swampy"
|
||||||
msgstr ""
|
msgstr "Cree; Swampy"
|
||||||
|
|
||||||
#. name for csy
|
#. name for csy
|
||||||
msgid "Chin; Siyin"
|
msgid "Chin; Siyin"
|
||||||
msgstr ""
|
msgstr "Chin; Siyin"
|
||||||
|
|
||||||
#. name for csz
|
#. name for csz
|
||||||
msgid "Coos"
|
msgid "Coos"
|
||||||
@ -5691,7 +5691,7 @@ msgstr "Chetco"
|
|||||||
|
|
||||||
#. name for ctd
|
#. name for ctd
|
||||||
msgid "Chin; Tedim"
|
msgid "Chin; Tedim"
|
||||||
msgstr ""
|
msgstr "Chin; Tedim"
|
||||||
|
|
||||||
#. name for cte
|
#. name for cte
|
||||||
msgid "Chinantec; Tepinapa"
|
msgid "Chinantec; Tepinapa"
|
||||||
@ -5727,7 +5727,7 @@ msgstr "Pandan"
|
|||||||
|
|
||||||
#. name for ctt
|
#. name for ctt
|
||||||
msgid "Chetti; Wayanad"
|
msgid "Chetti; Wayanad"
|
||||||
msgstr ""
|
msgstr "Chetti; Wayanad"
|
||||||
|
|
||||||
#. name for ctu
|
#. name for ctu
|
||||||
msgid "Chol"
|
msgid "Chol"
|
||||||
@ -5767,7 +5767,7 @@ msgstr "Mashco piro"
|
|||||||
|
|
||||||
#. name for cuk
|
#. name for cuk
|
||||||
msgid "Kuna; San Blas"
|
msgid "Kuna; San Blas"
|
||||||
msgstr ""
|
msgstr "Kuna; San Blas"
|
||||||
|
|
||||||
#. name for cul
|
#. name for cul
|
||||||
msgid "Culina"
|
msgid "Culina"
|
||||||
@ -5795,7 +5795,7 @@ msgstr "Chhulung"
|
|||||||
|
|
||||||
#. name for cut
|
#. name for cut
|
||||||
msgid "Cuicatec; Teutila"
|
msgid "Cuicatec; Teutila"
|
||||||
msgstr ""
|
msgstr "Cuicatec; Teutila"
|
||||||
|
|
||||||
#. name for cuu
|
#. name for cuu
|
||||||
msgid "Tai Ya"
|
msgid "Tai Ya"
|
||||||
@ -5811,7 +5811,7 @@ msgstr "Chukwa"
|
|||||||
|
|
||||||
#. name for cux
|
#. name for cux
|
||||||
msgid "Cuicatec; Tepeuxila"
|
msgid "Cuicatec; Tepeuxila"
|
||||||
msgstr ""
|
msgstr "Cuicatec; Tepeuxila"
|
||||||
|
|
||||||
#. name for cvg
|
#. name for cvg
|
||||||
msgid "Chug"
|
msgid "Chug"
|
||||||
@ -5831,7 +5831,7 @@ msgstr "Maindo"
|
|||||||
|
|
||||||
#. name for cwd
|
#. name for cwd
|
||||||
msgid "Cree; Woods"
|
msgid "Cree; Woods"
|
||||||
msgstr ""
|
msgstr "Cree; Woods"
|
||||||
|
|
||||||
#. name for cwe
|
#. name for cwe
|
||||||
msgid "Kwere"
|
msgid "Kwere"
|
||||||
@ -5879,7 +5879,7 @@ msgstr "Chino minzhong"
|
|||||||
|
|
||||||
#. name for czt
|
#. name for czt
|
||||||
msgid "Chin; Zotung"
|
msgid "Chin; Zotung"
|
||||||
msgstr ""
|
msgstr "Chin; Zotung"
|
||||||
|
|
||||||
#. name for daa
|
#. name for daa
|
||||||
msgid "Dangaléat"
|
msgid "Dangaléat"
|
||||||
@ -5935,7 +5935,7 @@ msgstr "Danés"
|
|||||||
|
|
||||||
#. name for dao
|
#. name for dao
|
||||||
msgid "Chin; Daai"
|
msgid "Chin; Daai"
|
||||||
msgstr ""
|
msgstr "Chin; Daai"
|
||||||
|
|
||||||
#. name for dap
|
#. name for dap
|
||||||
msgid "Nisi (India)"
|
msgid "Nisi (India)"
|
||||||
@ -5943,7 +5943,7 @@ msgstr "Nisi (India)"
|
|||||||
|
|
||||||
#. name for daq
|
#. name for daq
|
||||||
msgid "Maria; Dandami"
|
msgid "Maria; Dandami"
|
||||||
msgstr ""
|
msgstr "Maria; Dandami"
|
||||||
|
|
||||||
#. name for dar
|
#. name for dar
|
||||||
msgid "Dargwa"
|
msgid "Dargwa"
|
||||||
@ -5995,7 +5995,7 @@ msgstr "Edopi"
|
|||||||
|
|
||||||
#. name for dbg
|
#. name for dbg
|
||||||
msgid "Dogon; Dogul Dom"
|
msgid "Dogon; Dogul Dom"
|
||||||
msgstr ""
|
msgstr "Dogon; Dogul Dom"
|
||||||
|
|
||||||
#. name for dbi
|
#. name for dbi
|
||||||
msgid "Doka"
|
msgid "Doka"
|
||||||
@ -6035,7 +6035,7 @@ msgstr "Dabarre"
|
|||||||
|
|
||||||
#. name for dbu
|
#. name for dbu
|
||||||
msgid "Dogon; Bondum Dom"
|
msgid "Dogon; Bondum Dom"
|
||||||
msgstr ""
|
msgstr "Dogon; Bondum Dom"
|
||||||
|
|
||||||
#. name for dbv
|
#. name for dbv
|
||||||
msgid "Dungu"
|
msgid "Dungu"
|
||||||
@ -6067,7 +6067,7 @@ msgstr "Fataluku"
|
|||||||
|
|
||||||
#. name for ddi
|
#. name for ddi
|
||||||
msgid "Goodenough; West"
|
msgid "Goodenough; West"
|
||||||
msgstr ""
|
msgstr "Goodenough; West"
|
||||||
|
|
||||||
#. name for ddj
|
#. name for ddj
|
||||||
msgid "Jaru"
|
msgid "Jaru"
|
||||||
@ -6083,7 +6083,7 @@ msgstr "Dido"
|
|||||||
|
|
||||||
#. name for dds
|
#. name for dds
|
||||||
msgid "Dogon; Donno So"
|
msgid "Dogon; Donno So"
|
||||||
msgstr ""
|
msgstr "Dogon; Donno So"
|
||||||
|
|
||||||
#. name for ddw
|
#. name for ddw
|
||||||
msgid "Dawera-Daweloor"
|
msgid "Dawera-Daweloor"
|
||||||
@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)"
|
|||||||
|
|
||||||
#. name for dep
|
#. name for dep
|
||||||
msgid "Delaware; Pidgin"
|
msgid "Delaware; Pidgin"
|
||||||
msgstr ""
|
msgstr "Delaware; Pidgin"
|
||||||
|
|
||||||
#. name for deq
|
#. name for deq
|
||||||
msgid "Dendi (Central African Republic)"
|
msgid "Dendi (Central African Republic)"
|
||||||
@ -6167,11 +6167,11 @@ msgstr "Dagaare meridional"
|
|||||||
|
|
||||||
#. name for dgb
|
#. name for dgb
|
||||||
msgid "Dogon; Bunoge"
|
msgid "Dogon; Bunoge"
|
||||||
msgstr ""
|
msgstr "Dogon; Bunoge"
|
||||||
|
|
||||||
#. name for dgc
|
#. name for dgc
|
||||||
msgid "Agta; Casiguran Dumagat"
|
msgid "Agta; Casiguran Dumagat"
|
||||||
msgstr ""
|
msgstr "Agta; Casiguran Dumagat"
|
||||||
|
|
||||||
#. name for dgd
|
#. name for dgd
|
||||||
msgid "Dagaari Dioula"
|
msgid "Dagaari Dioula"
|
||||||
@ -6283,7 +6283,7 @@ msgstr "Dinka centromeridional"
|
|||||||
|
|
||||||
#. name for dic
|
#. name for dic
|
||||||
msgid "Dida; Lakota"
|
msgid "Dida; Lakota"
|
||||||
msgstr ""
|
msgstr "Dida; Lakota"
|
||||||
|
|
||||||
#. name for did
|
#. name for did
|
||||||
msgid "Didinga"
|
msgid "Didinga"
|
||||||
@ -6411,7 +6411,7 @@ msgstr "Djiwarli"
|
|||||||
|
|
||||||
#. name for djm
|
#. name for djm
|
||||||
msgid "Dogon; Jamsay"
|
msgid "Dogon; Jamsay"
|
||||||
msgstr ""
|
msgstr "Dogon; Jamsay"
|
||||||
|
|
||||||
#. name for djn
|
#. name for djn
|
||||||
msgid "Djauan"
|
msgid "Djauan"
|
||||||
@ -6471,7 +6471,7 @@ msgstr "Duma"
|
|||||||
|
|
||||||
#. name for dmb
|
#. name for dmb
|
||||||
msgid "Dogon; Mombo"
|
msgid "Dogon; Mombo"
|
||||||
msgstr ""
|
msgstr "Dogon; Mombo"
|
||||||
|
|
||||||
#. name for dmc
|
#. name for dmc
|
||||||
msgid "Dimir"
|
msgid "Dimir"
|
||||||
@ -6483,7 +6483,7 @@ msgstr "Dugwor"
|
|||||||
|
|
||||||
#. name for dmg
|
#. name for dmg
|
||||||
msgid "Kinabatangan; Upper"
|
msgid "Kinabatangan; Upper"
|
||||||
msgstr ""
|
msgstr "Kinabatangan; Upper"
|
||||||
|
|
||||||
#. name for dmk
|
#. name for dmk
|
||||||
msgid "Domaaki"
|
msgid "Domaaki"
|
||||||
@ -6503,7 +6503,7 @@ msgstr "Kemezung"
|
|||||||
|
|
||||||
#. name for dmr
|
#. name for dmr
|
||||||
msgid "Damar; East"
|
msgid "Damar; East"
|
||||||
msgstr ""
|
msgstr "Damar; East"
|
||||||
|
|
||||||
#. name for dms
|
#. name for dms
|
||||||
msgid "Dampelas"
|
msgid "Dampelas"
|
||||||
@ -6527,7 +6527,7 @@ msgstr "Demta"
|
|||||||
|
|
||||||
#. name for dna
|
#. name for dna
|
||||||
msgid "Dani; Upper Grand Valley"
|
msgid "Dani; Upper Grand Valley"
|
||||||
msgstr ""
|
msgstr "Dani; Upper Grand Valley"
|
||||||
|
|
||||||
#. name for dnd
|
#. name for dnd
|
||||||
msgid "Daonda"
|
msgid "Daonda"
|
||||||
@ -6543,7 +6543,7 @@ msgstr "Dungan"
|
|||||||
|
|
||||||
#. name for dni
|
#. name for dni
|
||||||
msgid "Dani; Lower Grand Valley"
|
msgid "Dani; Lower Grand Valley"
|
||||||
msgstr ""
|
msgstr "Dani; Lower Grand Valley"
|
||||||
|
|
||||||
#. name for dnk
|
#. name for dnk
|
||||||
msgid "Dengka"
|
msgid "Dengka"
|
||||||
@ -6559,7 +6559,7 @@ msgstr "Danaru"
|
|||||||
|
|
||||||
#. name for dnt
|
#. name for dnt
|
||||||
msgid "Dani; Mid Grand Valley"
|
msgid "Dani; Mid Grand Valley"
|
||||||
msgstr ""
|
msgstr "Dani; Mid Grand Valley"
|
||||||
|
|
||||||
#. name for dnu
|
#. name for dnu
|
||||||
msgid "Danau"
|
msgid "Danau"
|
||||||
@ -6695,7 +6695,7 @@ msgstr "Damar occidental"
|
|||||||
|
|
||||||
#. name for dro
|
#. name for dro
|
||||||
msgid "Melanau; Daro-Matu"
|
msgid "Melanau; Daro-Matu"
|
||||||
msgstr ""
|
msgstr "Melanau; Daro-Matu"
|
||||||
|
|
||||||
#. name for drq
|
#. name for drq
|
||||||
msgid "Dura"
|
msgid "Dura"
|
||||||
@ -6723,7 +6723,7 @@ msgstr "Darai"
|
|||||||
|
|
||||||
#. name for dsb
|
#. name for dsb
|
||||||
msgid "Sorbian; Lower"
|
msgid "Sorbian; Lower"
|
||||||
msgstr ""
|
msgstr "Sorbian; Lower"
|
||||||
|
|
||||||
#. name for dse
|
#. name for dse
|
||||||
msgid "Dutch Sign Language"
|
msgid "Dutch Sign Language"
|
||||||
@ -6759,7 +6759,7 @@ msgstr "Daur"
|
|||||||
|
|
||||||
#. name for dtb
|
#. name for dtb
|
||||||
msgid "Kadazan; Labuk-Kinabatangan"
|
msgid "Kadazan; Labuk-Kinabatangan"
|
||||||
msgstr ""
|
msgstr "Kadazan; Labuk-Kinabatangan"
|
||||||
|
|
||||||
#. name for dtd
|
#. name for dtd
|
||||||
msgid "Ditidaht"
|
msgid "Ditidaht"
|
||||||
@ -6767,15 +6767,15 @@ msgstr "Ditidaht"
|
|||||||
|
|
||||||
#. name for dti
|
#. name for dti
|
||||||
msgid "Dogon; Ana Tinga"
|
msgid "Dogon; Ana Tinga"
|
||||||
msgstr ""
|
msgstr "Dogon; Ana Tinga"
|
||||||
|
|
||||||
#. name for dtk
|
#. name for dtk
|
||||||
msgid "Dogon; Tene Kan"
|
msgid "Dogon; Tene Kan"
|
||||||
msgstr ""
|
msgstr "Dogon; Tene Kan"
|
||||||
|
|
||||||
#. name for dtm
|
#. name for dtm
|
||||||
msgid "Dogon; Tomo Kan"
|
msgid "Dogon; Tomo Kan"
|
||||||
msgstr ""
|
msgstr "Dogon; Tomo Kan"
|
||||||
|
|
||||||
#. name for dtp
|
#. name for dtp
|
||||||
msgid "Dusun; Central"
|
msgid "Dusun; Central"
|
||||||
@ -6787,15 +6787,15 @@ msgstr "Lotud"
|
|||||||
|
|
||||||
#. name for dts
|
#. name for dts
|
||||||
msgid "Dogon; Toro So"
|
msgid "Dogon; Toro So"
|
||||||
msgstr ""
|
msgstr "Dogon; Toro So"
|
||||||
|
|
||||||
#. name for dtt
|
#. name for dtt
|
||||||
msgid "Dogon; Toro Tegu"
|
msgid "Dogon; Toro Tegu"
|
||||||
msgstr ""
|
msgstr "Dogon; Toro Tegu"
|
||||||
|
|
||||||
#. name for dtu
|
#. name for dtu
|
||||||
msgid "Dogon; Tebul Ure"
|
msgid "Dogon; Tebul Ure"
|
||||||
msgstr ""
|
msgstr "Dogon; Tebul Ure"
|
||||||
|
|
||||||
#. name for dua
|
#. name for dua
|
||||||
msgid "Duala"
|
msgid "Duala"
|
||||||
@ -6815,7 +6815,7 @@ msgstr "Hun-saare"
|
|||||||
|
|
||||||
#. name for due
|
#. name for due
|
||||||
msgid "Agta; Umiray Dumaget"
|
msgid "Agta; Umiray Dumaget"
|
||||||
msgstr ""
|
msgstr "Agta; Umiray Dumaget"
|
||||||
|
|
||||||
#. name for duf
|
#. name for duf
|
||||||
msgid "Dumbea"
|
msgid "Dumbea"
|
||||||
@ -6843,7 +6843,7 @@ msgstr "Uyajitaya"
|
|||||||
|
|
||||||
#. name for dul
|
#. name for dul
|
||||||
msgid "Agta; Alabat Island"
|
msgid "Agta; Alabat Island"
|
||||||
msgstr ""
|
msgstr "Agta; Alabat Island"
|
||||||
|
|
||||||
#. name for dum
|
#. name for dum
|
||||||
msgid "Dutch; Middle (ca. 1050-1350)"
|
msgid "Dutch; Middle (ca. 1050-1350)"
|
||||||
@ -6855,7 +6855,7 @@ msgstr "Dusun deyah"
|
|||||||
|
|
||||||
#. name for duo
|
#. name for duo
|
||||||
msgid "Agta; Dupaninan"
|
msgid "Agta; Dupaninan"
|
||||||
msgstr ""
|
msgstr "Agta; Dupaninan"
|
||||||
|
|
||||||
#. name for dup
|
#. name for dup
|
||||||
msgid "Duano"
|
msgid "Duano"
|
||||||
@ -6891,7 +6891,7 @@ msgstr "Duungooma"
|
|||||||
|
|
||||||
#. name for duy
|
#. name for duy
|
||||||
msgid "Agta; Dicamay"
|
msgid "Agta; Dicamay"
|
||||||
msgstr ""
|
msgstr "Agta; Dicamay"
|
||||||
|
|
||||||
#. name for duz
|
#. name for duz
|
||||||
msgid "Duli"
|
msgid "Duli"
|
||||||
@ -6907,7 +6907,7 @@ msgstr "Diri"
|
|||||||
|
|
||||||
#. name for dwl
|
#. name for dwl
|
||||||
msgid "Dogon; Walo Kumbe"
|
msgid "Dogon; Walo Kumbe"
|
||||||
msgstr ""
|
msgstr "Dogon; Walo Kumbe"
|
||||||
|
|
||||||
#. name for dwr
|
#. name for dwr
|
||||||
msgid "Dawro"
|
msgid "Dawro"
|
||||||
@ -6935,15 +6935,15 @@ msgstr "Dyugun"
|
|||||||
|
|
||||||
#. name for dyg
|
#. name for dyg
|
||||||
msgid "Agta; Villa Viciosa"
|
msgid "Agta; Villa Viciosa"
|
||||||
msgstr ""
|
msgstr "Agta; Villa Viciosa"
|
||||||
|
|
||||||
#. name for dyi
|
#. name for dyi
|
||||||
msgid "Senoufo; Djimini"
|
msgid "Senoufo; Djimini"
|
||||||
msgstr ""
|
msgstr "Senoufo; Djimini"
|
||||||
|
|
||||||
#. name for dym
|
#. name for dym
|
||||||
msgid "Dogon; Yanda Dom"
|
msgid "Dogon; Yanda Dom"
|
||||||
msgstr ""
|
msgstr "Dogon; Yanda Dom"
|
||||||
|
|
||||||
#. name for dyn
|
#. name for dyn
|
||||||
msgid "Dyangadi"
|
msgid "Dyangadi"
|
||||||
@ -7095,19 +7095,19 @@ msgstr "Kol"
|
|||||||
|
|
||||||
#. name for ekm
|
#. name for ekm
|
||||||
msgid "Elip"
|
msgid "Elip"
|
||||||
msgstr ""
|
msgstr "Elip"
|
||||||
|
|
||||||
#. name for eko
|
#. name for eko
|
||||||
msgid "Koti"
|
msgid "Koti"
|
||||||
msgstr ""
|
msgstr "Koti"
|
||||||
|
|
||||||
#. name for ekp
|
#. name for ekp
|
||||||
msgid "Ekpeye"
|
msgid "Ekpeye"
|
||||||
msgstr ""
|
msgstr "Ekpeye"
|
||||||
|
|
||||||
#. name for ekr
|
#. name for ekr
|
||||||
msgid "Yace"
|
msgid "Yace"
|
||||||
msgstr ""
|
msgstr "Yace"
|
||||||
|
|
||||||
#. name for eky
|
#. name for eky
|
||||||
msgid "Kayah; Eastern"
|
msgid "Kayah; Eastern"
|
||||||
@ -7115,19 +7115,19 @@ msgstr "Kayah oriental"
|
|||||||
|
|
||||||
#. name for ele
|
#. name for ele
|
||||||
msgid "Elepi"
|
msgid "Elepi"
|
||||||
msgstr ""
|
msgstr "Elepi"
|
||||||
|
|
||||||
#. name for elh
|
#. name for elh
|
||||||
msgid "El Hugeirat"
|
msgid "El Hugeirat"
|
||||||
msgstr ""
|
msgstr "El Hugeirat"
|
||||||
|
|
||||||
#. name for eli
|
#. name for eli
|
||||||
msgid "Nding"
|
msgid "Nding"
|
||||||
msgstr ""
|
msgstr "Nding"
|
||||||
|
|
||||||
#. name for elk
|
#. name for elk
|
||||||
msgid "Elkei"
|
msgid "Elkei"
|
||||||
msgstr ""
|
msgstr "Elkei"
|
||||||
|
|
||||||
#. name for ell
|
#. name for ell
|
||||||
msgid "Greek; Modern (1453-)"
|
msgid "Greek; Modern (1453-)"
|
||||||
@ -7135,19 +7135,19 @@ msgstr "Griego moderno (1453-)"
|
|||||||
|
|
||||||
#. name for elm
|
#. name for elm
|
||||||
msgid "Eleme"
|
msgid "Eleme"
|
||||||
msgstr ""
|
msgstr "Eleme"
|
||||||
|
|
||||||
#. name for elo
|
#. name for elo
|
||||||
msgid "El Molo"
|
msgid "El Molo"
|
||||||
msgstr ""
|
msgstr "El Molo"
|
||||||
|
|
||||||
#. name for elp
|
#. name for elp
|
||||||
msgid "Elpaputih"
|
msgid "Elpaputih"
|
||||||
msgstr ""
|
msgstr "Elpaputih"
|
||||||
|
|
||||||
#. name for elu
|
#. name for elu
|
||||||
msgid "Elu"
|
msgid "Elu"
|
||||||
msgstr ""
|
msgstr "Elu"
|
||||||
|
|
||||||
#. name for elx
|
#. name for elx
|
||||||
msgid "Elamite"
|
msgid "Elamite"
|
||||||
@ -7155,15 +7155,15 @@ msgstr "Elamita"
|
|||||||
|
|
||||||
#. name for ema
|
#. name for ema
|
||||||
msgid "Emai-Iuleha-Ora"
|
msgid "Emai-Iuleha-Ora"
|
||||||
msgstr ""
|
msgstr "Emai-Iuleha-Ora"
|
||||||
|
|
||||||
#. name for emb
|
#. name for emb
|
||||||
msgid "Embaloh"
|
msgid "Embaloh"
|
||||||
msgstr ""
|
msgstr "Embaloh"
|
||||||
|
|
||||||
#. name for eme
|
#. name for eme
|
||||||
msgid "Emerillon"
|
msgid "Emerillon"
|
||||||
msgstr ""
|
msgstr "Emerillon"
|
||||||
|
|
||||||
#. name for emg
|
#. name for emg
|
||||||
msgid "Meohang; Eastern"
|
msgid "Meohang; Eastern"
|
||||||
@ -7171,7 +7171,7 @@ msgstr "Meohang oriental"
|
|||||||
|
|
||||||
#. name for emi
|
#. name for emi
|
||||||
msgid "Mussau-Emira"
|
msgid "Mussau-Emira"
|
||||||
msgstr ""
|
msgstr "Mussau-Emira"
|
||||||
|
|
||||||
#. name for emk
|
#. name for emk
|
||||||
msgid "Maninkakan; Eastern"
|
msgid "Maninkakan; Eastern"
|
||||||
@ -7179,15 +7179,15 @@ msgstr "Maninkakan oriental"
|
|||||||
|
|
||||||
#. name for emm
|
#. name for emm
|
||||||
msgid "Mamulique"
|
msgid "Mamulique"
|
||||||
msgstr ""
|
msgstr "Mamulique"
|
||||||
|
|
||||||
#. name for emn
|
#. name for emn
|
||||||
msgid "Eman"
|
msgid "Eman"
|
||||||
msgstr ""
|
msgstr "Eman"
|
||||||
|
|
||||||
#. name for emo
|
#. name for emo
|
||||||
msgid "Emok"
|
msgid "Emok"
|
||||||
msgstr ""
|
msgstr "Emok"
|
||||||
|
|
||||||
#. name for emp
|
#. name for emp
|
||||||
msgid "Emberá; Northern"
|
msgid "Emberá; Northern"
|
||||||
@ -7203,11 +7203,11 @@ msgstr "Muria oriental"
|
|||||||
|
|
||||||
#. name for emw
|
#. name for emw
|
||||||
msgid "Emplawas"
|
msgid "Emplawas"
|
||||||
msgstr ""
|
msgstr "Emplawas"
|
||||||
|
|
||||||
#. name for emx
|
#. name for emx
|
||||||
msgid "Erromintxela"
|
msgid "Erromintxela"
|
||||||
msgstr ""
|
msgstr "Erromintxela"
|
||||||
|
|
||||||
#. name for emy
|
#. name for emy
|
||||||
msgid "Mayan; Epigraphic"
|
msgid "Mayan; Epigraphic"
|
||||||
|
@ -9,14 +9,14 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2012-03-06 13:55+0000\n"
|
"PO-Revision-Date: 2012-04-18 13:08+0000\n"
|
||||||
"Last-Translator: Asier Iturralde Sarasola <Unknown>\n"
|
"Last-Translator: Asier Iturralde Sarasola <Unknown>\n"
|
||||||
"Language-Team: Euskara <itzulpena@comtropos.com>\n"
|
"Language-Team: Euskara <itzulpena@comtropos.com>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n"
|
"X-Launchpad-Export-Date: 2012-04-19 04:36+0000\n"
|
||||||
"X-Generator: Launchpad (build 14907)\n"
|
"X-Generator: Launchpad (build 15108)\n"
|
||||||
"Language: eu\n"
|
"Language: eu\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
@ -27125,7 +27125,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for vie
|
#. name for vie
|
||||||
msgid "Vietnamese"
|
msgid "Vietnamese"
|
||||||
msgstr "Mahastiak"
|
msgstr "Vietnamera"
|
||||||
|
|
||||||
#. name for vif
|
#. name for vif
|
||||||
msgid "Vili"
|
msgid "Vili"
|
||||||
|
@ -12,14 +12,14 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2011-09-27 16:34+0000\n"
|
"PO-Revision-Date: 2012-04-03 11:51+0000\n"
|
||||||
"Last-Translator: Kovid Goyal <Unknown>\n"
|
"Last-Translator: Antón Méixome <meixome@gmail.com>\n"
|
||||||
"Language-Team: Galician <proxecto@trasno.net>\n"
|
"Language-Team: Galician <proxecto@trasno.net>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2011-11-26 05:16+0000\n"
|
"X-Launchpad-Export-Date: 2012-04-04 04:39+0000\n"
|
||||||
"X-Generator: Launchpad (build 14381)\n"
|
"X-Generator: Launchpad (build 15055)\n"
|
||||||
"Language: gl\n"
|
"Language: gl\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
@ -1324,7 +1324,7 @@ msgstr "Apinayé"
|
|||||||
|
|
||||||
#. name for apo
|
#. name for apo
|
||||||
msgid "Ambul"
|
msgid "Ambul"
|
||||||
msgstr ""
|
msgstr "Ambul"
|
||||||
|
|
||||||
#. name for app
|
#. name for app
|
||||||
msgid "Apma"
|
msgid "Apma"
|
||||||
@ -1376,7 +1376,7 @@ msgstr "Archi"
|
|||||||
|
|
||||||
#. name for aqd
|
#. name for aqd
|
||||||
msgid "Dogon; Ampari"
|
msgid "Dogon; Ampari"
|
||||||
msgstr ""
|
msgstr "Dogon; Ampari"
|
||||||
|
|
||||||
#. name for aqg
|
#. name for aqg
|
||||||
msgid "Arigidi"
|
msgid "Arigidi"
|
||||||
@ -3120,7 +3120,7 @@ msgstr "Bekwarra"
|
|||||||
|
|
||||||
#. name for bkw
|
#. name for bkw
|
||||||
msgid "Bekwel"
|
msgid "Bekwel"
|
||||||
msgstr ""
|
msgstr "Bekwel"
|
||||||
|
|
||||||
#. name for bkx
|
#. name for bkx
|
||||||
msgid "Baikeno"
|
msgid "Baikeno"
|
||||||
@ -3316,7 +3316,7 @@ msgstr "Biao Mon"
|
|||||||
|
|
||||||
#. name for bmu
|
#. name for bmu
|
||||||
msgid "Somba-Siawari"
|
msgid "Somba-Siawari"
|
||||||
msgstr ""
|
msgstr "Somba-Siawari"
|
||||||
|
|
||||||
#. name for bmv
|
#. name for bmv
|
||||||
msgid "Bum"
|
msgid "Bum"
|
||||||
@ -4608,7 +4608,7 @@ msgstr "Basa (Nixeria)"
|
|||||||
|
|
||||||
#. name for bzx
|
#. name for bzx
|
||||||
msgid "Bozo; Kɛlɛngaxo"
|
msgid "Bozo; Kɛlɛngaxo"
|
||||||
msgstr ""
|
msgstr "Bozo; Kelengaxo"
|
||||||
|
|
||||||
#. name for bzy
|
#. name for bzy
|
||||||
msgid "Obanliku"
|
msgid "Obanliku"
|
||||||
@ -6476,7 +6476,7 @@ msgstr "Duma"
|
|||||||
|
|
||||||
#. name for dmb
|
#. name for dmb
|
||||||
msgid "Dogon; Mombo"
|
msgid "Dogon; Mombo"
|
||||||
msgstr ""
|
msgstr "Dogon; Mombo"
|
||||||
|
|
||||||
#. name for dmc
|
#. name for dmc
|
||||||
msgid "Dimir"
|
msgid "Dimir"
|
||||||
@ -6672,7 +6672,7 @@ msgstr "Dair"
|
|||||||
|
|
||||||
#. name for drc
|
#. name for drc
|
||||||
msgid "Minderico"
|
msgid "Minderico"
|
||||||
msgstr ""
|
msgstr "Minderico"
|
||||||
|
|
||||||
#. name for drd
|
#. name for drd
|
||||||
msgid "Darmiya"
|
msgid "Darmiya"
|
||||||
@ -6768,7 +6768,7 @@ msgstr "Kadazan; Labuk-Kinabatangan"
|
|||||||
|
|
||||||
#. name for dtd
|
#. name for dtd
|
||||||
msgid "Ditidaht"
|
msgid "Ditidaht"
|
||||||
msgstr ""
|
msgstr "Ditidaht"
|
||||||
|
|
||||||
#. name for dti
|
#. name for dti
|
||||||
msgid "Dogon; Ana Tinga"
|
msgid "Dogon; Ana Tinga"
|
||||||
@ -6844,7 +6844,7 @@ msgstr "Dhuwal"
|
|||||||
|
|
||||||
#. name for duk
|
#. name for duk
|
||||||
msgid "Uyajitaya"
|
msgid "Uyajitaya"
|
||||||
msgstr ""
|
msgstr "Uyajitaya"
|
||||||
|
|
||||||
#. name for dul
|
#. name for dul
|
||||||
msgid "Agta; Alabat Island"
|
msgid "Agta; Alabat Island"
|
||||||
@ -8168,7 +8168,7 @@ msgstr "Yiwom"
|
|||||||
|
|
||||||
#. name for gel
|
#. name for gel
|
||||||
msgid "ut-Ma'in"
|
msgid "ut-Ma'in"
|
||||||
msgstr ""
|
msgstr "ut-Ma'in"
|
||||||
|
|
||||||
#. name for geq
|
#. name for geq
|
||||||
msgid "Geme"
|
msgid "Geme"
|
||||||
@ -12508,7 +12508,7 @@ msgstr "Konzo"
|
|||||||
|
|
||||||
#. name for kop
|
#. name for kop
|
||||||
msgid "Waube"
|
msgid "Waube"
|
||||||
msgstr ""
|
msgstr "Waube"
|
||||||
|
|
||||||
#. name for koq
|
#. name for koq
|
||||||
msgid "Kota (Gabon)"
|
msgid "Kota (Gabon)"
|
||||||
@ -16732,7 +16732,7 @@ msgstr "Elseng"
|
|||||||
|
|
||||||
#. name for mrg
|
#. name for mrg
|
||||||
msgid "Mising"
|
msgid "Mising"
|
||||||
msgstr ""
|
msgstr "Mising"
|
||||||
|
|
||||||
#. name for mrh
|
#. name for mrh
|
||||||
msgid "Chin; Mara"
|
msgid "Chin; Mara"
|
||||||
@ -17956,7 +17956,7 @@ msgstr "Ndoola"
|
|||||||
|
|
||||||
#. name for nds
|
#. name for nds
|
||||||
msgid "German; Low"
|
msgid "German; Low"
|
||||||
msgstr ""
|
msgstr "Baixo alemán"
|
||||||
|
|
||||||
#. name for ndt
|
#. name for ndt
|
||||||
msgid "Ndunga"
|
msgid "Ndunga"
|
||||||
@ -18004,7 +18004,7 @@ msgstr "Nde-Gbite"
|
|||||||
|
|
||||||
#. name for nee
|
#. name for nee
|
||||||
msgid "Nêlêmwa-Nixumwak"
|
msgid "Nêlêmwa-Nixumwak"
|
||||||
msgstr ""
|
msgstr "Nêlêmwa-Nixumwak"
|
||||||
|
|
||||||
#. name for nef
|
#. name for nef
|
||||||
msgid "Nefamese"
|
msgid "Nefamese"
|
||||||
@ -18300,7 +18300,7 @@ msgstr "Nias"
|
|||||||
|
|
||||||
#. name for nib
|
#. name for nib
|
||||||
msgid "Nakame"
|
msgid "Nakame"
|
||||||
msgstr ""
|
msgstr "Nakame"
|
||||||
|
|
||||||
#. name for nid
|
#. name for nid
|
||||||
msgid "Ngandi"
|
msgid "Ngandi"
|
||||||
@ -19024,7 +19024,7 @@ msgstr "Kalapuya do norte"
|
|||||||
|
|
||||||
#. name for nru
|
#. name for nru
|
||||||
msgid "Narua"
|
msgid "Narua"
|
||||||
msgstr ""
|
msgstr "Narúa"
|
||||||
|
|
||||||
#. name for nrx
|
#. name for nrx
|
||||||
msgid "Ngurmbur"
|
msgid "Ngurmbur"
|
||||||
@ -19216,7 +19216,7 @@ msgstr "Nyole"
|
|||||||
|
|
||||||
#. name for nuk
|
#. name for nuk
|
||||||
msgid "Nuu-chah-nulth"
|
msgid "Nuu-chah-nulth"
|
||||||
msgstr ""
|
msgstr "Nuu-chah-nulth"
|
||||||
|
|
||||||
#. name for nul
|
#. name for nul
|
||||||
msgid "Nusa Laut"
|
msgid "Nusa Laut"
|
||||||
@ -19228,7 +19228,7 @@ msgstr "Niuafo'ou"
|
|||||||
|
|
||||||
#. name for nun
|
#. name for nun
|
||||||
msgid "Anong"
|
msgid "Anong"
|
||||||
msgstr ""
|
msgstr "Anong"
|
||||||
|
|
||||||
#. name for nuo
|
#. name for nuo
|
||||||
msgid "Nguôn"
|
msgid "Nguôn"
|
||||||
@ -20124,7 +20124,7 @@ msgstr "Glio-Oubi"
|
|||||||
|
|
||||||
#. name for oue
|
#. name for oue
|
||||||
msgid "Oune"
|
msgid "Oune"
|
||||||
msgstr ""
|
msgstr "Oune"
|
||||||
|
|
||||||
#. name for oui
|
#. name for oui
|
||||||
msgid "Uighur; Old"
|
msgid "Uighur; Old"
|
||||||
@ -20540,7 +20540,7 @@ msgstr "Rerep"
|
|||||||
|
|
||||||
#. name for pgl
|
#. name for pgl
|
||||||
msgid "Irish; Primitive"
|
msgid "Irish; Primitive"
|
||||||
msgstr ""
|
msgstr "Irlandés; Primitivo"
|
||||||
|
|
||||||
#. name for pgn
|
#. name for pgn
|
||||||
msgid "Paelignian"
|
msgid "Paelignian"
|
||||||
@ -25256,7 +25256,7 @@ msgstr "Tukumanféd"
|
|||||||
|
|
||||||
#. name for tkg
|
#. name for tkg
|
||||||
msgid "Malagasy; Tesaka"
|
msgid "Malagasy; Tesaka"
|
||||||
msgstr ""
|
msgstr "Malaio; Tesaka"
|
||||||
|
|
||||||
#. name for tkl
|
#. name for tkl
|
||||||
msgid "Tokelau"
|
msgid "Tokelau"
|
||||||
@ -26000,7 +26000,7 @@ msgstr "Lingua de signos taiwanés"
|
|||||||
|
|
||||||
#. name for tst
|
#. name for tst
|
||||||
msgid "Songway Kiini; Tondi"
|
msgid "Songway Kiini; Tondi"
|
||||||
msgstr ""
|
msgstr "Songway Kiini; Tondi"
|
||||||
|
|
||||||
#. name for tsu
|
#. name for tsu
|
||||||
msgid "Tsou"
|
msgid "Tsou"
|
||||||
@ -27576,7 +27576,7 @@ msgstr "Weh"
|
|||||||
|
|
||||||
#. name for wei
|
#. name for wei
|
||||||
msgid "Kiunum"
|
msgid "Kiunum"
|
||||||
msgstr ""
|
msgstr "Kiunum"
|
||||||
|
|
||||||
#. name for wem
|
#. name for wem
|
||||||
msgid "Gbe; Weme"
|
msgid "Gbe; Weme"
|
||||||
@ -28100,7 +28100,7 @@ msgstr "Wotapuri-Katarqalai"
|
|||||||
|
|
||||||
#. name for wtf
|
#. name for wtf
|
||||||
msgid "Watiwa"
|
msgid "Watiwa"
|
||||||
msgstr ""
|
msgstr "Watiwa"
|
||||||
|
|
||||||
#. name for wti
|
#. name for wti
|
||||||
msgid "Berta"
|
msgid "Berta"
|
||||||
@ -28700,7 +28700,7 @@ msgstr "Makhuwa-Marrevone"
|
|||||||
|
|
||||||
#. name for xmd
|
#. name for xmd
|
||||||
msgid "Mbudum"
|
msgid "Mbudum"
|
||||||
msgstr ""
|
msgstr "Mbudum"
|
||||||
|
|
||||||
#. name for xme
|
#. name for xme
|
||||||
msgid "Median"
|
msgid "Median"
|
||||||
@ -28768,7 +28768,7 @@ msgstr "Kamu"
|
|||||||
|
|
||||||
#. name for xmv
|
#. name for xmv
|
||||||
msgid "Malagasy; Tankarana"
|
msgid "Malagasy; Tankarana"
|
||||||
msgstr ""
|
msgstr "Malaio; Tankarana"
|
||||||
|
|
||||||
#. name for xmw
|
#. name for xmw
|
||||||
msgid "Malagasy; Tsimihety"
|
msgid "Malagasy; Tsimihety"
|
||||||
@ -29852,7 +29852,7 @@ msgstr "Yombe"
|
|||||||
|
|
||||||
#. name for yon
|
#. name for yon
|
||||||
msgid "Yongkom"
|
msgid "Yongkom"
|
||||||
msgstr ""
|
msgstr "Yongkom"
|
||||||
|
|
||||||
#. name for yor
|
#. name for yor
|
||||||
msgid "Yoruba"
|
msgid "Yoruba"
|
||||||
@ -30348,7 +30348,7 @@ msgstr "Zimakani"
|
|||||||
|
|
||||||
#. name for zil
|
#. name for zil
|
||||||
msgid "Zialo"
|
msgid "Zialo"
|
||||||
msgstr ""
|
msgstr "Zialo"
|
||||||
|
|
||||||
#. name for zim
|
#. name for zim
|
||||||
msgid "Mesme"
|
msgid "Mesme"
|
||||||
|
@ -5,7 +5,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import sys, os, re, time, random, __builtin__, warnings
|
import sys, os, re, time, random, __builtin__, warnings
|
||||||
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
||||||
from htmlentitydefs import name2codepoint
|
|
||||||
from math import floor
|
from math import floor
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
@ -381,12 +380,15 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
|||||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||||
opener.addheaders = [('User-agent', user_agent)]
|
opener.addheaders = [('User-agent', user_agent)]
|
||||||
proxies = get_proxies()
|
proxies = get_proxies()
|
||||||
|
to_add = {}
|
||||||
http_proxy = proxies.get('http', None)
|
http_proxy = proxies.get('http', None)
|
||||||
if http_proxy:
|
if http_proxy:
|
||||||
opener.set_proxies({'http':http_proxy})
|
to_add['http'] = http_proxy
|
||||||
https_proxy = proxies.get('https', None)
|
https_proxy = proxies.get('https', None)
|
||||||
if https_proxy:
|
if https_proxy:
|
||||||
opener.set_proxies({'https':https_proxy})
|
to_add['https'] = https_proxy
|
||||||
|
if to_add:
|
||||||
|
opener.set_proxies(to_add)
|
||||||
|
|
||||||
return opener
|
return opener
|
||||||
|
|
||||||
@ -548,6 +550,12 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
|||||||
return check(chr(num).decode(encoding))
|
return check(chr(num).decode(encoding))
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
return check(my_unichr(num))
|
return check(my_unichr(num))
|
||||||
|
from calibre.utils.html5_entities import entity_map
|
||||||
|
try:
|
||||||
|
return check(entity_map[ent])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
try:
|
try:
|
||||||
return check(my_unichr(name2codepoint[ent]))
|
return check(my_unichr(name2codepoint[ent]))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|