Merge from trunk

This commit is contained in:
Charles Haley 2011-02-25 15:11:01 +00:00
commit cf684a052b
64 changed files with 22478 additions and 20784 deletions

View File

@ -1,10 +1,15 @@
__license__ = 'GPL v3'
__copyright__ = '2011'
'''
lemonde.fr
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class LeMonde(BasicNewsRecipe):
title = 'Le Monde'
__author__ = 'veezh'
description = u'Actualit\xe9s'
description = 'Actualités'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
@ -12,13 +17,27 @@ class LeMonde(BasicNewsRecipe):
use_embedded_content = False
encoding = 'cp1252'
publisher = 'lemonde.fr'
category = 'news, France, world'
language = 'fr'
#publication_type = 'newsportal'
extra_css = '''
h1{font-size:130%;}
.ariane{font-size:xx-small;}
.source{font-size:xx-small;}
#.href{font-size:xx-small;}
.LM_caption{color:#666666; font-size:x-small;}
#.main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
'''
#preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
remove_empty_feeds = True
@ -32,15 +51,28 @@ class LeMonde(BasicNewsRecipe):
return soup
preprocess_regexps = [
(re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
(re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + '&nbsp;' + m.group(4) + m.group(5) + m.group(6)),
(re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + '&nbsp;' + m.group(2) + m.group(3) + m.group(4)),
(re.compile(r'<span>'), lambda match: ' <span>'),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'&ldquo;'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'&rdquo;'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'>\''), lambda match: '>&lsquo;'),
(re.compile(r' \''), lambda match: ' &lsquo;'),
(re.compile(r'\''), lambda match: '&rsquo;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'"<em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<a href='), lambda match: '&laquo;&nbsp;<a href='),
(re.compile(r'</em>"'), lambda match: '&nbsp;&raquo;</em>'),
(re.compile(r'</a>"'), lambda match: '&nbsp;&raquo;</a>'),
(re.compile(r'"</'), lambda match: '&nbsp;&raquo;</'),
(re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'),
(re.compile(r' "'), lambda match: ' &laquo;&nbsp;'),
(re.compile(r'" '), lambda match: '&nbsp;&raquo; '),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'),
(re.compile(r'",'), lambda match: '&nbsp;&raquo;,'),
(re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'),
@ -56,8 +88,14 @@ class LeMonde(BasicNewsRecipe):
(re.compile(r' %'), lambda match: '&nbsp;%'),
(re.compile(r'\.jpg&nbsp;&raquo; border='), lambda match: '.jpg'),
(re.compile(r'\.png&nbsp;&raquo; border='), lambda match: '.png'),
(re.compile(r' &ndash; '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' - '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' -,'), lambda match: '&nbsp;&ndash;,'),
(re.compile(r'&raquo;:'), lambda match: '&raquo;&nbsp;:'),
]
keep_only_tags = [
dict(name='div', attrs={'class':['contenu']})
]
@ -65,11 +103,15 @@ class LeMonde(BasicNewsRecipe):
remove_tags_after = [dict(id='appel_temoignage')]
def get_article_url(self, article):
link = article.get('link')
if 'blog' not in link:
return link
url = article.get('guid', None)
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
url = None
return url
# def get_article_url(self, article):
# link = article.get('link')
# if 'blog' not in link and ('chat' not in link):
# return link
feeds = [
('A la une', 'http://www.lemonde.fr/rss/une.xml'),
@ -94,3 +136,4 @@ class LeMonde(BasicNewsRecipe):
cover_url = link_item.img['src']
return cover_url

View File

@ -198,7 +198,7 @@ def cli_docs(app):
documented_cmds = []
undocumented_cmds = []
for script in entry_points['console_scripts']:
for script in entry_points['console_scripts'] + entry_points['gui_scripts']:
module = script[script.index('=')+1:script.index(':')].strip()
cmd = script[:script.index('=')].strip()
if cmd in ('calibre-complete', 'calibre-parallel'): continue

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff