mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Arret sur images by Francois D
This commit is contained in:
parent
9be9b8fb36
commit
a671413989
54
recipes/arret_sur_images.recipe
Normal file
54
recipes/arret_sur_images.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__license__ = 'WTFPL'
|
||||
__author__ = '2013, François D. <franek at chicour.net>'
|
||||
__description__ = 'Get some fresh news from Arrêt sur images'
|
||||
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Asi(BasicNewsRecipe):
|
||||
|
||||
title = 'Arrêt sur images'
|
||||
__author__ = 'François D. (aka franek)'
|
||||
description = 'Global news in french from news site "Arrêt sur images"'
|
||||
|
||||
oldest_article = 7.0
|
||||
language = 'fr'
|
||||
needs_subscription = True
|
||||
max_articles_per_feed = 100
|
||||
|
||||
simultaneous_downloads = 1
|
||||
timefmt = '[%a, %d %b %Y %I:%M +0200]'
|
||||
cover_url = 'http://www.arretsurimages.net/images/header/menu/menu_1.png'
|
||||
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [
|
||||
('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
|
||||
('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
|
||||
('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
|
||||
]
|
||||
|
||||
conversion_options = { 'smarten_punctuation' : True }
|
||||
|
||||
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('contenu.php', 'contenu-imprimable.php')
|
||||
|
||||
def get_browser(self):
|
||||
# Need to use robust HTML parser
|
||||
br = BasicNewsRecipe.get_browser(self, use_robust_parser=True)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.arretsurimages.net/index.php')
|
||||
br.select_form(nr=0)
|
||||
br.form.set_all_readonly(False)
|
||||
br['redir'] = 'forum/login.php'
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
@ -376,7 +376,7 @@ def random_user_agent(choose=None):
|
||||
choose = random.randint(0, len(choices)-1)
|
||||
return choices[choose]
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False):
|
||||
'''
|
||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
||||
@ -385,7 +385,11 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||
'''
|
||||
from calibre.utils.browser import Browser
|
||||
opener = Browser()
|
||||
if use_robust_parser:
|
||||
import mechanize
|
||||
opener = Browser(factory=mechanize.RobustFactory())
|
||||
else:
|
||||
opener = Browser()
|
||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
if user_agent is None:
|
||||
|
@ -17,10 +17,10 @@ class Browser(B):
|
||||
cookie jar. All clones share the same browser configuration.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._clone_actions = {}
|
||||
|
||||
B.__init__(self)
|
||||
B.__init__(self, *args, **kwargs)
|
||||
self.set_cookiejar(CookieJar())
|
||||
|
||||
def set_handle_refresh(self, *args, **kwargs):
|
||||
|
Loading…
x
Reference in New Issue
Block a user