mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial implementation of web2lrf
This commit is contained in:
parent
a601c9c11e
commit
8f38a29165
@ -13,7 +13,7 @@
|
|||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
''' E-book management software'''
|
''' E-book management software'''
|
||||||
__version__ = "0.3.69"
|
__version__ = "0.3.70"
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
__appname__ = 'libprs500'
|
__appname__ = 'libprs500'
|
||||||
|
@ -74,7 +74,7 @@ def option_parser(usage):
|
|||||||
metadata = parser.add_option_group('METADATA OPTIONS')
|
metadata = parser.add_option_group('METADATA OPTIONS')
|
||||||
metadata.add_option('--header', action='store_true', default=False, dest='header',
|
metadata.add_option('--header', action='store_true', default=False, dest='header',
|
||||||
help='Add a header to all the pages with title and author.')
|
help='Add a header to all the pages with title and author.')
|
||||||
metadata.add_option("-t", "--title", action="store", type="string", \
|
metadata.add_option("-t", "--title", action="store", type="string", default=None,\
|
||||||
dest="title", help="Set the title. Default: filename.")
|
dest="title", help="Set the title. Default: filename.")
|
||||||
metadata.add_option("-a", "--author", action="store", type="string", \
|
metadata.add_option("-a", "--author", action="store", type="string", \
|
||||||
dest="author", help="Set the author. Default: %default", default='Unknown')
|
dest="author", help="Set the author. Default: %default", default='Unknown')
|
||||||
|
14
src/libprs500/ebooks/lrf/web/__init__.py
Normal file
14
src/libprs500/ebooks/lrf/web/__init__.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
119
src/libprs500/ebooks/lrf/web/convert_from.py
Normal file
119
src/libprs500/ebooks/lrf/web/convert_from.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''Convert known websites into LRF files.'''
|
||||||
|
|
||||||
|
import sys, time, tempfile, shutil, os
|
||||||
|
from urlparse import urlsplit
|
||||||
|
|
||||||
|
from libprs500 import __appname__
|
||||||
|
from libprs500.ebooks.lrf.html.convert_from import option_parser as html_option_parser
|
||||||
|
from libprs500.ebooks.lrf.html.convert_from import process_file
|
||||||
|
from libprs500.ebooks.lrf.web.profiles import profiles
|
||||||
|
from libprs500.web.fetch.simple import setup_logger as web2disk_setup_logger
|
||||||
|
from libprs500.web.fetch.simple import create_fetcher
|
||||||
|
|
||||||
|
available_profiles = profiles.keys()
|
||||||
|
available_profiles.remove('default')
|
||||||
|
available_profiles = ' '.join(available_profiles)
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = html_option_parser(usage='''%prog [options] website_profile\n\n'''
|
||||||
|
'''%prog downloads a site from the web and converts it '''
|
||||||
|
'''into a LRF file for use with the SONY Reader. '''
|
||||||
|
'''website_profile is one of '''+available_profiles+\
|
||||||
|
''' If you specify a website_profile of default or do not specify '''
|
||||||
|
'''it, you must specify the --url option.'''
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.remove_option('-t')
|
||||||
|
|
||||||
|
parser.add_option('-u', '--url', dest='url', default=None,
|
||||||
|
help='The URL to download. You only need to specify this if you are not specifying a website_profile.')
|
||||||
|
|
||||||
|
parser.add_option('-t', '--timeout', help='Timeout in seconds to wait for a response from the server. Default: %default s',
|
||||||
|
default=None, type='int', dest='timeout')
|
||||||
|
parser.add_option('-r', '--max-recursions', help='Maximum number of levels to recurse i.e. depth of links to follow. Default %default',
|
||||||
|
default=None, type='int', dest='max_recursions')
|
||||||
|
parser.add_option('-n', '--max-files', default=None, type='int', dest='max_files',
|
||||||
|
help='The maximum number of files to download. This only applies to files from <a href> tags. Default is %default')
|
||||||
|
parser.add_option('--delay', default=None, dest='delay', type='int',
|
||||||
|
help='Minimum interval in seconds between consecutive fetches. Default is %default s')
|
||||||
|
parser.add_option('--dont-download-stylesheets', action='store_true', default=None,
|
||||||
|
help='Do not download CSS stylesheets.', dest='no_stylesheets')
|
||||||
|
|
||||||
|
parser.add_option('--match-regexp', dest='match_regexps', default=[], action='append',
|
||||||
|
help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
|
||||||
|
parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
|
||||||
|
help='Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --filter-regexp is applied first.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def fetch_website(options):
|
||||||
|
tdir = tempfile.mkdtemp(prefix=__appname__+'_' )
|
||||||
|
options.dir = tdir
|
||||||
|
web2disk_setup_logger(options)
|
||||||
|
fetcher = create_fetcher(options)
|
||||||
|
fetcher.preprocess_regexps = options.preprocess_regexps
|
||||||
|
return fetcher.start_fetch(options.url), tdir
|
||||||
|
|
||||||
|
def create_lrf(htmlfile, options):
|
||||||
|
options.author = __appname__
|
||||||
|
options.header = True
|
||||||
|
if not options.output:
|
||||||
|
options.output = os.path.abspath(os.path.expanduser(options.title + ('.lrs' if options.lrs else '.lrf')))
|
||||||
|
process_file(htmlfile, options)
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
options, args = parser.parse_args(args)
|
||||||
|
if len(args) > 2:
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if len(args) == 2:
|
||||||
|
if not profiles.has_key(args[1]):
|
||||||
|
print >>sys.stderr, 'Unknown profile', args[1]
|
||||||
|
print >>sys.stderr, 'Valid profiles:', profiles.keys()
|
||||||
|
return 1
|
||||||
|
profile = profiles[args[1]] if len(args) == 2 else profiles['default']
|
||||||
|
|
||||||
|
for opt in ('url', 'timeout', 'max_recursions', 'max_files', 'delay', 'no_stylesheets'):
|
||||||
|
val = getattr(options, opt)
|
||||||
|
if val is None:
|
||||||
|
setattr(options, opt, profile[opt])
|
||||||
|
|
||||||
|
if not options.url:
|
||||||
|
parser.print_help()
|
||||||
|
print >>sys.stderr
|
||||||
|
print >>sys.stderr, 'You must specify the --url option or a profile from one of:',
|
||||||
|
print >>sys.stderr, available_profiles
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not options.title:
|
||||||
|
title = profile['title']
|
||||||
|
if not title:
|
||||||
|
title = urlsplit(options.url).netloc
|
||||||
|
options.title = title + time.strftime(' [%a %d %b %Y]', time.localtime())
|
||||||
|
|
||||||
|
options.match_regexps += profile['match_regexps']
|
||||||
|
options.preprocess_regexps = profile['preprocess_regexps']
|
||||||
|
options.filter_regexps += profile['filter_regexps']
|
||||||
|
|
||||||
|
htmlfile, tdir = fetch_website(options)
|
||||||
|
create_lrf(htmlfile, options)
|
||||||
|
shutil.rmtree(tdir)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
99
src/libprs500/ebooks/lrf/web/profiles.py
Normal file
99
src/libprs500/ebooks/lrf/web/profiles.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''Profiles for known websites.'''
|
||||||
|
|
||||||
|
import time, re
|
||||||
|
|
||||||
|
profiles = {
|
||||||
|
'default' : {
|
||||||
|
'url' : '', # The URL of the website
|
||||||
|
'title' : '', # The title to use for the LRF file
|
||||||
|
'max_recursions' : 1, # Number of levels of links to follow
|
||||||
|
'max_files' : 1000, # Maximum number of files to download
|
||||||
|
'delay' : 0, # Delay between consecutive downloads
|
||||||
|
'timeout' : 10, # Timeout for fetching files from server
|
||||||
|
'no_stylesheets' : False, # Download stylesheets
|
||||||
|
'match_regexps' : [], # List of regular expressions that determines which links to follow
|
||||||
|
'filter_regexps' : [], # List of regular expressions that determines which links to ignore
|
||||||
|
# Only one of match_regexps or filter_regexps should be defined
|
||||||
|
'html2lrf_options' : [], # List of options to pass to html2lrf
|
||||||
|
'preprocess_regexps': [], # List of regexp substitution rules to run on the downloaded HTML before running html2lrf
|
||||||
|
# See the profiles below for examples of these settings.
|
||||||
|
},
|
||||||
|
|
||||||
|
'nytimes' : {
|
||||||
|
'url' : 'http://nytimesriver.com',
|
||||||
|
'title' : 'The New York Times',
|
||||||
|
'match_regexps' : 'nytimes.com/'+time.strftime('%Y', time.localtime()),
|
||||||
|
'preprocess_regexps' :
|
||||||
|
[ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
# Remove help link and replace by title
|
||||||
|
(r'<a .*?alt=.Click here for information about this service.*?</a>',
|
||||||
|
lambda match: '<h1>The New York Times</h1>\n<p align="right"><b>%s</b></p>'%(time.strftime('%a %d %b %Y', time.localtime()),)),
|
||||||
|
# Blank line before categories
|
||||||
|
(r'<b>\s*NYT', lambda match: '<p></p><b>NYT'),
|
||||||
|
# Blank line between articles
|
||||||
|
(r'<p><a href', lambda match : '<br /><p><a href'),
|
||||||
|
# Remove header on individual articles
|
||||||
|
(r'<body class=.printerversion..*?<h1><nyt_headline',
|
||||||
|
lambda match : '<body class="printerversion">\n<h1><nyt_headline'),
|
||||||
|
# Remove footer from individiual articles
|
||||||
|
(r'<nyt_update_bottom.*', lambda match : '</body></html>'),
|
||||||
|
# Remove TimesSelect garbage
|
||||||
|
(r'<title>.*?TimesSelect', lambda match : 'Downloading of TimesSelect stories is not supported.<!--'),
|
||||||
|
]
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'bbc' : {
|
||||||
|
'url' : 'http://bbcriver.com',
|
||||||
|
'title' : 'The BBC',
|
||||||
|
'no_stylesheets' : True,
|
||||||
|
'preprocess_regexps' :
|
||||||
|
[ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
# Remove help link and replace by title
|
||||||
|
(r'<a .*?alt=.Click here for information about this service.*?</a>',
|
||||||
|
lambda match: '<h1>The BBC</h1>\n<p align="right"><b>%s</b></p>'%(time.strftime('%a %d %b %Y', time.localtime()),)),
|
||||||
|
# Blank line before categories
|
||||||
|
(r'<b>\s*BBC', lambda match: '<p></p><b>BBC'),
|
||||||
|
# Remove footer from individual stories
|
||||||
|
(r'<div class=.footer.>.*?Published',
|
||||||
|
lambda match : '<p></p><div class="footer">Published'),
|
||||||
|
# Add some style info in place of disabled stylesheet
|
||||||
|
(r'<link.*?type=.text/css.*?>',
|
||||||
|
'<style type="text/css">.headline {font-size: x-large;}</style>'),
|
||||||
|
]
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for key in profiles.keys():
|
||||||
|
if key == 'default':
|
||||||
|
continue
|
||||||
|
newd = profiles['default'].copy()
|
||||||
|
newd.update(profiles[key])
|
||||||
|
profiles[key] = newd
|
||||||
|
|
||||||
|
def profile_to_command_line_options(profile):
|
||||||
|
args = []
|
||||||
|
args.append('--max-recursions='+str(profile['max_recursions']))
|
||||||
|
args.append('--delay='+str(profile['delay']))
|
||||||
|
for i in profile['match_regexps']:
|
||||||
|
args.append('--match-regexp="'+i+'"')
|
||||||
|
for i in profile['filter_regexps']:
|
||||||
|
args.append('--filter-regexp="'+i+'"')
|
||||||
|
return args
|
@ -15,7 +15,7 @@
|
|||||||
'''
|
'''
|
||||||
Fetch a webpage and its links recursively.
|
Fetch a webpage and its links recursively.
|
||||||
'''
|
'''
|
||||||
import sys, socket, urllib2, os, urlparse, codecs, logging, re, time
|
import sys, socket, urllib2, os, urlparse, codecs, logging, re, time, copy
|
||||||
from urllib import url2pathname
|
from urllib import url2pathname
|
||||||
from httplib import responses
|
from httplib import responses
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
@ -45,6 +45,11 @@ def save_soup(soup, target):
|
|||||||
class RecursiveFetcher(object):
|
class RecursiveFetcher(object):
|
||||||
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
|
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
|
||||||
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
|
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
|
||||||
|
#ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
|
||||||
|
# (
|
||||||
|
#
|
||||||
|
# )
|
||||||
|
# )
|
||||||
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
|
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
|
||||||
|
|
||||||
def __init__(self, options):
|
def __init__(self, options):
|
||||||
@ -64,6 +69,14 @@ class RecursiveFetcher(object):
|
|||||||
self.stylemap = {}
|
self.stylemap = {}
|
||||||
self.current_dir = self.base_dir
|
self.current_dir = self.base_dir
|
||||||
self.files = 0
|
self.files = 0
|
||||||
|
self.preprocess_regexps = []
|
||||||
|
self.download_stylesheets = not options.no_stylesheets
|
||||||
|
|
||||||
|
|
||||||
|
def get_soup(self, src):
|
||||||
|
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
|
nmassage.extend(self.preprocess_regexps)
|
||||||
|
return BeautifulSoup(src, markupMassage=nmassage)
|
||||||
|
|
||||||
def fetch_url(self, url):
|
def fetch_url(self, url):
|
||||||
f = None
|
f = None
|
||||||
@ -84,7 +97,7 @@ class RecursiveFetcher(object):
|
|||||||
|
|
||||||
def start_fetch(self, url):
|
def start_fetch(self, url):
|
||||||
soup = BeautifulSoup('<a href="'+url+'" />')
|
soup = BeautifulSoup('<a href="'+url+'" />')
|
||||||
print 'Working',
|
print 'Downloading',
|
||||||
res = self.process_links(soup, url, 0, into_dir='')
|
res = self.process_links(soup, url, 0, into_dir='')
|
||||||
print '%s saved to %s'%(url, res)
|
print '%s saved to %s'%(url, res)
|
||||||
return res
|
return res
|
||||||
@ -99,9 +112,8 @@ class RecursiveFetcher(object):
|
|||||||
if self.filter_regexps:
|
if self.filter_regexps:
|
||||||
for f in self.filter_regexps:
|
for f in self.filter_regexps:
|
||||||
if f.search(url):
|
if f.search(url):
|
||||||
return False
|
return False
|
||||||
return True
|
if self.match_regexps:
|
||||||
elif self.match_regexps:
|
|
||||||
for m in self.match_regexps:
|
for m in self.match_regexps:
|
||||||
if m.search(url):
|
if m.search(url):
|
||||||
return True
|
return True
|
||||||
@ -243,10 +255,11 @@ class RecursiveFetcher(object):
|
|||||||
try:
|
try:
|
||||||
self.current_dir = linkdiskpath
|
self.current_dir = linkdiskpath
|
||||||
f = self.fetch_url(iurl)
|
f = self.fetch_url(iurl)
|
||||||
soup = BeautifulSoup(f.read())
|
soup = self.get_soup(f.read())
|
||||||
logger.info('Processing images...')
|
logger.info('Processing images...')
|
||||||
self.process_images(soup, f.geturl())
|
self.process_images(soup, f.geturl())
|
||||||
self.process_stylesheets(soup, f.geturl())
|
if self.download_stylesheets:
|
||||||
|
self.process_stylesheets(soup, f.geturl())
|
||||||
|
|
||||||
res = os.path.join(linkdiskpath, basename(iurl))
|
res = os.path.join(linkdiskpath, basename(iurl))
|
||||||
self.filemap[nurl] = res
|
self.filemap[nurl] = res
|
||||||
@ -284,26 +297,36 @@ def option_parser(usage='%prog URL\n\nWhere URL is for example http://google.com
|
|||||||
default=1, type='int', dest='max_recursions')
|
default=1, type='int', dest='max_recursions')
|
||||||
parser.add_option('-n', '--max-files', default=sys.maxint, type='int', dest='max_files',
|
parser.add_option('-n', '--max-files', default=sys.maxint, type='int', dest='max_files',
|
||||||
help='The maximum number of files to download. This only applies to files from <a href> tags. Default is %default')
|
help='The maximum number of files to download. This only applies to files from <a href> tags. Default is %default')
|
||||||
|
parser.add_option('--delay', default=0, dest='delay', type='int',
|
||||||
|
help='Minimum interval in seconds between consecutive fetches. Default is %default s')
|
||||||
parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps',
|
parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps',
|
||||||
help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
|
help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
|
||||||
parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
|
parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
|
||||||
help='Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --match-regexp is ignored.')
|
help='Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --filter-regexp is applied first.')
|
||||||
parser.add_option('--delay', default=0, dest='delay', type='int',
|
parser.add_option('--dont-download-stylesheets', action='store_true', default=False,
|
||||||
help='Minimum interval in seconds between consecutive fetches. Default is %default s')
|
help='Do not download CSS stylesheets.', dest='no_stylesheets')
|
||||||
|
|
||||||
parser.add_option('--verbose', help='Show detailed output information. Useful for debugging',
|
parser.add_option('--verbose', help='Show detailed output information. Useful for debugging',
|
||||||
default=False, action='store_true', dest='verbose')
|
default=False, action='store_true', dest='verbose')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def create_fetcher(options):
|
||||||
|
return RecursiveFetcher(options)
|
||||||
|
|
||||||
|
def setup_logger(options):
|
||||||
|
level = logging.DEBUG if options.verbose else logging.WARNING
|
||||||
|
setup_cli_handlers(logger, level)
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
options, args = parser.parse_args(args)
|
options, args = parser.parse_args(args)
|
||||||
if len(args) != 2:
|
if len(args) != 2:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
level = logging.DEBUG if options.verbose else logging.WARNING
|
|
||||||
setup_cli_handlers(logger, level)
|
setup_logger(options)
|
||||||
|
fetcher = create_fetcher(options)
|
||||||
fetcher = RecursiveFetcher(options)
|
|
||||||
fetcher.start_fetch(args[1])
|
fetcher.start_fetch(args[1])
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user