Merge from trunk

This commit is contained in:
Charles Haley 2011-02-25 15:11:01 +00:00
commit cf684a052b
64 changed files with 22478 additions and 20784 deletions

View File

@ -1,10 +1,15 @@
__license__ = 'GPL v3'
__copyright__ = '2011'
'''
lemonde.fr
'''
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class LeMonde(BasicNewsRecipe): class LeMonde(BasicNewsRecipe):
title = 'Le Monde' title = 'Le Monde'
__author__ = 'veezh' __author__ = 'veezh'
description = u'Actualit\xe9s' description = 'Actualités'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -12,13 +17,27 @@ class LeMonde(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'lemonde.fr' publisher = 'lemonde.fr'
category = 'news, France, world'
language = 'fr' language = 'fr'
#publication_type = 'newsportal'
extra_css = '''
h1{font-size:130%;}
.ariane{font-size:xx-small;}
.source{font-size:xx-small;}
#.href{font-size:xx-small;}
.LM_caption{color:#666666; font-size:x-small;}
#.main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
'''
#preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'language' : language ,'tags' : category
,'publisher' : publisher ,'language' : language
,'linearize_tables': True ,'publisher' : publisher
} ,'linearize_tables': True
}
remove_empty_feeds = True remove_empty_feeds = True
@ -32,15 +51,28 @@ class LeMonde(BasicNewsRecipe):
return soup return soup
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
(re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + '&nbsp;' + m.group(4) + m.group(5) + m.group(6)),
(re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + '&nbsp;' + m.group(2) + m.group(3) + m.group(4)),
(re.compile(r'<span>'), lambda match: ' <span>'),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'&ldquo;'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'&rdquo;'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'>\''), lambda match: '>&lsquo;'),
(re.compile(r' \''), lambda match: ' &lsquo;'), (re.compile(r' \''), lambda match: ' &lsquo;'),
(re.compile(r'\''), lambda match: '&rsquo;'), (re.compile(r'\''), lambda match: '&rsquo;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'), (re.compile(r'"<em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<a href='), lambda match: '&laquo;&nbsp;<a href='),
(re.compile(r'</em>"'), lambda match: '&nbsp;&raquo;</em>'),
(re.compile(r'</a>"'), lambda match: '&nbsp;&raquo;</a>'),
(re.compile(r'"</'), lambda match: '&nbsp;&raquo;</'),
(re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'), (re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'), (re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'),
(re.compile(r' "'), lambda match: ' &laquo;&nbsp;'), (re.compile(r' "'), lambda match: ' &laquo;&nbsp;'),
(re.compile(r'" '), lambda match: '&nbsp;&raquo; '), (re.compile(r'" '), lambda match: '&nbsp;&raquo; '),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'), (re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'),
(re.compile(r'",'), lambda match: '&nbsp;&raquo;,'), (re.compile(r'",'), lambda match: '&nbsp;&raquo;,'),
(re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'), (re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'),
@ -56,8 +88,14 @@ class LeMonde(BasicNewsRecipe):
(re.compile(r' %'), lambda match: '&nbsp;%'), (re.compile(r' %'), lambda match: '&nbsp;%'),
(re.compile(r'\.jpg&nbsp;&raquo; border='), lambda match: '.jpg'), (re.compile(r'\.jpg&nbsp;&raquo; border='), lambda match: '.jpg'),
(re.compile(r'\.png&nbsp;&raquo; border='), lambda match: '.png'), (re.compile(r'\.png&nbsp;&raquo; border='), lambda match: '.png'),
(re.compile(r' &ndash; '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' - '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' -,'), lambda match: '&nbsp;&ndash;,'),
(re.compile(r'&raquo;:'), lambda match: '&raquo;&nbsp;:'),
] ]
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':['contenu']}) dict(name='div', attrs={'class':['contenu']})
] ]
@ -65,11 +103,15 @@ class LeMonde(BasicNewsRecipe):
remove_tags_after = [dict(id='appel_temoignage')] remove_tags_after = [dict(id='appel_temoignage')]
def get_article_url(self, article): def get_article_url(self, article):
link = article.get('link') url = article.get('guid', None)
if 'blog' not in link: if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
return link url = None
return url
# def get_article_url(self, article):
# link = article.get('link')
# if 'blog' not in link and ('chat' not in link):
# return link
feeds = [ feeds = [
('A la une', 'http://www.lemonde.fr/rss/une.xml'), ('A la une', 'http://www.lemonde.fr/rss/une.xml'),
@ -94,3 +136,4 @@ class LeMonde(BasicNewsRecipe):
cover_url = link_item.img['src'] cover_url = link_item.img['src']
return cover_url return cover_url

View File

@ -198,7 +198,7 @@ def cli_docs(app):
documented_cmds = [] documented_cmds = []
undocumented_cmds = [] undocumented_cmds = []
for script in entry_points['console_scripts']: for script in entry_points['console_scripts'] + entry_points['gui_scripts']:
module = script[script.index('=')+1:script.index(':')].strip() module = script[script.index('=')+1:script.index(':')].strip()
cmd = script[:script.index('=')].strip() cmd = script[:script.index('=')].strip()
if cmd in ('calibre-complete', 'calibre-parallel'): continue if cmd in ('calibre-complete', 'calibre-parallel'): continue

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff