mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Arret sur images by Francois D
This commit is contained in:
parent
9be9b8fb36
commit
a671413989
54
recipes/arret_sur_images.recipe
Normal file
54
recipes/arret_sur_images.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'WTFPL'
|
||||||
|
__author__ = '2013, François D. <franek at chicour.net>'
|
||||||
|
__description__ = 'Get some fresh news from Arrêt sur images'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Asi(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Arrêt sur images'
|
||||||
|
__author__ = 'François D. (aka franek)'
|
||||||
|
description = 'Global news in french from news site "Arrêt sur images"'
|
||||||
|
|
||||||
|
oldest_article = 7.0
|
||||||
|
language = 'fr'
|
||||||
|
needs_subscription = True
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
simultaneous_downloads = 1
|
||||||
|
timefmt = '[%a, %d %b %Y %I:%M +0200]'
|
||||||
|
cover_url = 'http://www.arretsurimages.net/images/header/menu/menu_1.png'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
|
||||||
|
('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
|
||||||
|
('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
|
||||||
|
]
|
||||||
|
|
||||||
|
conversion_options = { 'smarten_punctuation' : True }
|
||||||
|
|
||||||
|
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('contenu.php', 'contenu-imprimable.php')
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
# Need to use robust HTML parser
|
||||||
|
br = BasicNewsRecipe.get_browser(self, use_robust_parser=True)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://www.arretsurimages.net/index.php')
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.form.set_all_readonly(False)
|
||||||
|
br['redir'] = 'forum/login.php'
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
@ -376,7 +376,7 @@ def random_user_agent(choose=None):
|
|||||||
choose = random.randint(0, len(choices)-1)
|
choose = random.randint(0, len(choices)-1)
|
||||||
return choices[choose]
|
return choices[choose]
|
||||||
|
|
||||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False):
|
||||||
'''
|
'''
|
||||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
refresh requests and ignores robots.txt. Also uses proxy if available.
|
||||||
@ -385,6 +385,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
|||||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||||
'''
|
'''
|
||||||
from calibre.utils.browser import Browser
|
from calibre.utils.browser import Browser
|
||||||
|
if use_robust_parser:
|
||||||
|
import mechanize
|
||||||
|
opener = Browser(factory=mechanize.RobustFactory())
|
||||||
|
else:
|
||||||
opener = Browser()
|
opener = Browser()
|
||||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||||
opener.set_handle_robots(False)
|
opener.set_handle_robots(False)
|
||||||
|
@ -17,10 +17,10 @@ class Browser(B):
|
|||||||
cookie jar. All clones share the same browser configuration.
|
cookie jar. All clones share the same browser configuration.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, *args, **kwargs):
|
||||||
self._clone_actions = {}
|
self._clone_actions = {}
|
||||||
|
|
||||||
B.__init__(self)
|
B.__init__(self, *args, **kwargs)
|
||||||
self.set_cookiejar(CookieJar())
|
self.set_cookiejar(CookieJar())
|
||||||
|
|
||||||
def set_handle_refresh(self, *args, **kwargs):
|
def set_handle_refresh(self, *args, **kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user