mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
feeds2lrf
This commit is contained in:
parent
fb53f18a8f
commit
00b0cf46fc
@ -18,7 +18,7 @@ __docformat__ = "epytext"
|
|||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
__appname__ = 'libprs500'
|
__appname__ = 'libprs500'
|
||||||
|
|
||||||
import sys, os, logging, mechanize, locale, cStringIO, re, subprocess, textwrap
|
import sys, os, logging, mechanize, locale, copy, cStringIO, re, subprocess, textwrap
|
||||||
from gettext import GNUTranslations
|
from gettext import GNUTranslations
|
||||||
from math import floor
|
from math import floor
|
||||||
from optparse import OptionParser as _OptionParser
|
from optparse import OptionParser as _OptionParser
|
||||||
@ -143,38 +143,64 @@ class OptionParser(_OptionParser):
|
|||||||
raise Exception(msg)
|
raise Exception(msg)
|
||||||
_OptionParser.error(self, msg)
|
_OptionParser.error(self, msg)
|
||||||
|
|
||||||
|
def merge(self, parser):
|
||||||
|
'''
|
||||||
|
Add options from parser to self. In case of conflicts, confilicting options from
|
||||||
|
parser are skipped.
|
||||||
|
'''
|
||||||
|
opts = list(parser.option_list)
|
||||||
|
groups = list(parser.option_groups)
|
||||||
|
|
||||||
|
def merge_options(options, container):
|
||||||
|
for opt in copy.deepcopy(options):
|
||||||
|
if not self.has_option(opt.get_opt_string()):
|
||||||
|
container.add_option(opt)
|
||||||
|
|
||||||
|
merge_options(opts, self)
|
||||||
|
|
||||||
|
for group in groups:
|
||||||
|
g = self.add_option_group(group.title)
|
||||||
|
merge_options(group.option_list, g)
|
||||||
|
|
||||||
def subsume(self, group_name, msg=''):
|
def subsume(self, group_name, msg=''):
|
||||||
'''
|
'''
|
||||||
Move all existing options into a subgroup named
|
Move all existing options into a subgroup named
|
||||||
C{group_name} with description C{msg}.
|
C{group_name} with description C{msg}.
|
||||||
'''
|
'''
|
||||||
opts = list(self.option_list)
|
opts = [opt for opt in self.options_iter() if opt.get_opt_string() not in ('--version', '--help')]
|
||||||
groups = list(self.option_groups)
|
|
||||||
exclude = []
|
|
||||||
|
|
||||||
for opt in opts:
|
|
||||||
ops = opt.get_opt_string()
|
|
||||||
if ops in ('--help', '--version'):
|
|
||||||
exclude.append(opt)
|
|
||||||
else:
|
|
||||||
self.remove_option(ops)
|
|
||||||
for group in groups:
|
|
||||||
for opt in group.option_list:
|
|
||||||
opts.append(opt)
|
|
||||||
group.remove_option(opt)
|
|
||||||
|
|
||||||
self.option_groups = []
|
self.option_groups = []
|
||||||
subgroup = self.add_option_group(group_name, msg)
|
subgroup = self.add_option_group(group_name, msg)
|
||||||
for opt in opts:
|
for opt in opts:
|
||||||
if opt in exclude:
|
self.remove_option(opt.get_opt_string())
|
||||||
continue
|
|
||||||
subgroup.add_option(opt)
|
subgroup.add_option(opt)
|
||||||
|
|
||||||
|
def options_iter(self):
|
||||||
|
for opt in self.option_list:
|
||||||
|
if str(opt).strip():
|
||||||
|
yield opt
|
||||||
|
for gr in self.option_groups:
|
||||||
|
for opt in gr.option_list:
|
||||||
|
if str(opt).strip():
|
||||||
|
yield opt
|
||||||
|
|
||||||
|
def option_by_dest(self, dest):
|
||||||
|
for opt in self.options_iter():
|
||||||
|
if opt.dest == dest:
|
||||||
|
return opt
|
||||||
|
|
||||||
|
def merge_options(self, lower, upper):
|
||||||
|
'''
|
||||||
|
Merge options in lower and upper option lists into upper.
|
||||||
|
Default values in upper are overriden by
|
||||||
|
non default values in lower.
|
||||||
|
'''
|
||||||
|
for dest in lower.__dict__.keys():
|
||||||
|
if not upper.__dict__.has_key(dest):
|
||||||
|
continue
|
||||||
|
opt = self.option_by_dest(dest)
|
||||||
|
if lower.__dict__[dest] != opt.default and \
|
||||||
|
upper.__dict__[dest] == opt.default:
|
||||||
|
upper.__dict__[dest] = lower.__dict__[dest]
|
||||||
|
|
||||||
|
|
||||||
def load_library(name, cdll):
|
def load_library(name, cdll):
|
||||||
|
16
src/libprs500/ebooks/lrf/feeds/__init__.py
Normal file
16
src/libprs500/ebooks/lrf/feeds/__init__.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
71
src/libprs500/ebooks/lrf/feeds/convert_from.py
Normal file
71
src/libprs500/ebooks/lrf/feeds/convert_from.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''
|
||||||
|
Convert web feeds to LRF files.
|
||||||
|
'''
|
||||||
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
|
from libprs500.ebooks.lrf.html.convert_from import process_file
|
||||||
|
from libprs500.web.feeds.main import option_parser as feeds_option_parser
|
||||||
|
from libprs500.web.feeds.main import run_recipe
|
||||||
|
from libprs500.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from libprs500 import sanitize_file_name
|
||||||
|
|
||||||
|
import sys, os, time
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = feeds_option_parser()
|
||||||
|
parser.remove_option('--output-dir')
|
||||||
|
parser.remove_option('--lrf')
|
||||||
|
parser.subsume('FEEDS2DISK OPTIONS', _('Options to control the behavior of feeds2disk'))
|
||||||
|
lrf_parser = lrf_option_parser('')
|
||||||
|
lrf_parser.subsume('HTML2LRF OPTIONS', _('Options to control the behavior of html2lrf'))
|
||||||
|
parser.merge(lrf_parser)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv, notification=None, handler=None):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
opts.lrf = True
|
||||||
|
|
||||||
|
if len(args) != 2 and opts.feeds is None:
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
recipe_arg = args[1] if len(args) > 1 else None
|
||||||
|
|
||||||
|
tdir = PersistentTemporaryDirectory('_feeds2lrf')
|
||||||
|
opts.output_dir = tdir
|
||||||
|
|
||||||
|
recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
|
||||||
|
|
||||||
|
htmlfile = os.path.join(tdir, 'index.html')
|
||||||
|
if not os.access(htmlfile, os.R_OK):
|
||||||
|
raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg)
|
||||||
|
|
||||||
|
lparser = lrf_option_parser('')
|
||||||
|
ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0]
|
||||||
|
parser.merge_options(ropts, opts)
|
||||||
|
|
||||||
|
if not opts.output:
|
||||||
|
ext = '.lrs' if opts.lrs else '.lrf'
|
||||||
|
fname = recipe.title + time.strftime(recipe.timefmt)+ext
|
||||||
|
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
|
||||||
|
print 'Generating LRF...'
|
||||||
|
process_file(htmlfile, opts)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -1715,7 +1715,7 @@ def process_file(path, options, logger=None):
|
|||||||
|
|
||||||
tpath = ''
|
tpath = ''
|
||||||
try_opf(path, options, logger)
|
try_opf(path, options, logger)
|
||||||
if options.cover:
|
if getattr(options, 'cover', None):
|
||||||
options.cover = os.path.expanduser(options.cover)
|
options.cover = os.path.expanduser(options.cover)
|
||||||
if not os.path.isabs(options.cover):
|
if not os.path.isabs(options.cover):
|
||||||
options.cover = os.path.join(dirpath, options.cover)
|
options.cover = os.path.join(dirpath, options.cover)
|
||||||
@ -1750,7 +1750,7 @@ def process_file(path, options, logger=None):
|
|||||||
options.title = default_title
|
options.title = default_title
|
||||||
|
|
||||||
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
||||||
val = getattr(options, prop)
|
val = getattr(options, prop, None)
|
||||||
if val and not isinstance(val, unicode):
|
if val and not isinstance(val, unicode):
|
||||||
soup = BeautifulSoup(val)
|
soup = BeautifulSoup(val)
|
||||||
setattr(options, prop, unicode(soup))
|
setattr(options, prop, unicode(soup))
|
||||||
@ -1822,13 +1822,14 @@ def try_opf(path, options, logger):
|
|||||||
break
|
break
|
||||||
if opf is None:
|
if opf is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
dirpath = os.path.dirname(os.path.abspath(opf))
|
dirpath = os.path.dirname(os.path.abspath(opf))
|
||||||
opf = OPFReader(open(opf, 'rb'), dirpath)
|
opf = OPFReader(open(opf, 'rb'), dirpath)
|
||||||
try:
|
try:
|
||||||
title = opf.title
|
title = opf.title
|
||||||
if title and not options.title:
|
if title and not getattr(options, 'title', None):
|
||||||
options.title = title
|
options.title = title
|
||||||
if options.author == 'Unknown':
|
if getattr(options, 'author', 'Unknown') == 'Unknown':
|
||||||
if opf.authors:
|
if opf.authors:
|
||||||
options.author = ', '.join(opf.authors)
|
options.author = ', '.join(opf.authors)
|
||||||
if opf.author_sort:
|
if opf.author_sort:
|
||||||
@ -1837,12 +1838,12 @@ def try_opf(path, options, logger):
|
|||||||
publisher = opf.publisher
|
publisher = opf.publisher
|
||||||
if publisher:
|
if publisher:
|
||||||
options.publisher = publisher
|
options.publisher = publisher
|
||||||
if not options.category:
|
if not getattr(options, 'category', None):
|
||||||
category = opf.category
|
category = opf.category
|
||||||
if category:
|
if category:
|
||||||
options.category = category
|
options.category = category
|
||||||
if not options.cover or options.use_metadata_cover:
|
if not getattr(options, 'cover', None) or options.use_metadata_cover:
|
||||||
orig_cover = options.cover
|
orig_cover = getattr(options, 'cover', None)
|
||||||
options.cover = None
|
options.cover = None
|
||||||
cover = opf.cover
|
cover = opf.cover
|
||||||
if cover:
|
if cover:
|
||||||
@ -1865,10 +1866,10 @@ def try_opf(path, options, logger):
|
|||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
if not options.cover and orig_cover is not None:
|
if not getattr(options, 'cover', None) and orig_cover is not None:
|
||||||
options.cover = orig_cover
|
options.cover = orig_cover
|
||||||
options.spine = [i.href for i in opf.spine.items()]
|
options.spine = [i.href for i in opf.spine.items()]
|
||||||
if not hasattr(options, 'toc') or options.toc is None:
|
if not getattr(options, 'toc', None):
|
||||||
options.toc = opf.toc
|
options.toc = opf.toc
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Failed to process opf file')
|
logger.exception('Failed to process opf file')
|
||||||
|
@ -503,7 +503,7 @@ class OPFReader(OPF):
|
|||||||
stream.close()
|
stream.close()
|
||||||
self.manifest = Manifest(self.soup, dir)
|
self.manifest = Manifest(self.soup, dir)
|
||||||
self.spine = Spine(self.soup, self.manifest)
|
self.spine = Spine(self.soup, self.manifest)
|
||||||
self.toc = TOC()
|
self.toc = TOC(base_path=dir)
|
||||||
self.toc.read_from_opf(self)
|
self.toc.read_from_opf(self)
|
||||||
self.cover_data = (None, None)
|
self.cover_data = (None, None)
|
||||||
|
|
||||||
@ -554,12 +554,14 @@ class OPFCreator(MetaInformation):
|
|||||||
self.manifest = rentries
|
self.manifest = rentries
|
||||||
|
|
||||||
def create_manifest_from_files_in(self, files_and_dirs):
|
def create_manifest_from_files_in(self, files_and_dirs):
|
||||||
|
#self.base_path = os.path.commonprefix(files_and_dirs)
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
def dodir(dir):
|
def dodir(dir):
|
||||||
for root, dirs, files in os.walk(dir):
|
for root, dirs, files in os.walk(dir):
|
||||||
for name in files:
|
for name in files:
|
||||||
path = os.path.join(root, name)
|
path = os.path.join(root, name)
|
||||||
|
if os.path.isfile(path):
|
||||||
entries.append((path, None))
|
entries.append((path, None))
|
||||||
|
|
||||||
for i in files_and_dirs:
|
for i in files_and_dirs:
|
||||||
|
@ -166,6 +166,7 @@ def setup_completion(fatal_errors):
|
|||||||
from libprs500.ebooks.mobi.reader import option_parser as mobioeb
|
from libprs500.ebooks.mobi.reader import option_parser as mobioeb
|
||||||
from libprs500.web.feeds.main import option_parser as feeds2disk
|
from libprs500.web.feeds.main import option_parser as feeds2disk
|
||||||
from libprs500.web.feeds.recipes import titles as feed_titles
|
from libprs500.web.feeds.recipes import titles as feed_titles
|
||||||
|
from libprs500.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
|
||||||
|
|
||||||
f = open_file('/etc/bash_completion.d/libprs500')
|
f = open_file('/etc/bash_completion.d/libprs500')
|
||||||
|
|
||||||
@ -191,6 +192,7 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
|
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
|
||||||
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
|
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
|
||||||
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
||||||
|
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||||
f.write('''
|
f.write('''
|
||||||
_prs500_ls()
|
_prs500_ls()
|
||||||
{
|
{
|
||||||
|
@ -79,7 +79,7 @@ class Feed(object):
|
|||||||
self.oldest_article = oldest_article
|
self.oldest_article = oldest_article
|
||||||
|
|
||||||
for item in entries:
|
for item in entries:
|
||||||
if len(self.articles) > max_articles_per_feed:
|
if len(self.articles) >= max_articles_per_feed:
|
||||||
break
|
break
|
||||||
self.parse_article(item)
|
self.parse_article(item)
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ Available builtin recipes are:
|
|||||||
p.remove_option('--base-dir')
|
p.remove_option('--base-dir')
|
||||||
p.remove_option('--verbose')
|
p.remove_option('--verbose')
|
||||||
p.remove_option('--max-files')
|
p.remove_option('--max-files')
|
||||||
p.subsume('WEB2DISK OPTIONS', 'Options to control web2disk (used to fetch websites linked from feeds)')
|
p.subsume('WEB2DISK OPTIONS', _('Options to control web2disk (used to fetch websites linked from feeds)'))
|
||||||
|
|
||||||
p.add_option('--feeds', default=None,
|
p.add_option('--feeds', default=None,
|
||||||
help=_('''Specify a list of feeds to download. For example:
|
help=_('''Specify a list of feeds to download. For example:
|
||||||
@ -50,7 +50,7 @@ If you specify this option, any argument to %prog is ignored and a default recip
|
|||||||
p.add_option('--verbose', default=False, action='store_true',
|
p.add_option('--verbose', default=False, action='store_true',
|
||||||
help=_('''Be more verbose while processing.'''))
|
help=_('''Be more verbose while processing.'''))
|
||||||
p.add_option('--title', default=None,
|
p.add_option('--title', default=None,
|
||||||
help='The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.')
|
help=_('The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.'))
|
||||||
p.add_option('--username', default=None, help=_('Username for sites that require a login to access content.'))
|
p.add_option('--username', default=None, help=_('Username for sites that require a login to access content.'))
|
||||||
p.add_option('--password', default=None, help=_('Password for sites that require a login to access content.'))
|
p.add_option('--password', default=None, help=_('Password for sites that require a login to access content.'))
|
||||||
p.add_option('--lrf', default=False, action='store_true', help='Optimize fetching for subsequent conversion to LRF.')
|
p.add_option('--lrf', default=False, action='store_true', help='Optimize fetching for subsequent conversion to LRF.')
|
||||||
@ -61,7 +61,9 @@ If you specify this option, any argument to %prog is ignored and a default recip
|
|||||||
p.add_option('--no-progress-bar', dest='progress_bar', default=True, action='store_false',
|
p.add_option('--no-progress-bar', dest='progress_bar', default=True, action='store_false',
|
||||||
help=_('Dont show the progress bar'))
|
help=_('Dont show the progress bar'))
|
||||||
p.add_option('--debug', action='store_true', default=False,
|
p.add_option('--debug', action='store_true', default=False,
|
||||||
help='Very verbose output, useful for debugging.')
|
help=_('Very verbose output, useful for debugging.'))
|
||||||
|
p.add_option('--test', action='store_true', default=False,
|
||||||
|
help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.'))
|
||||||
|
|
||||||
return p
|
return p
|
||||||
|
|
||||||
@ -72,10 +74,10 @@ def simple_progress_bar(percent, msg):
|
|||||||
def no_progress_bar(percent, msg):
|
def no_progress_bar(percent, msg):
|
||||||
print msg
|
print msg
|
||||||
|
|
||||||
def main(args=sys.argv, notification=None, handler=None):
|
class RecipeError(Exception):
|
||||||
p = option_parser()
|
pass
|
||||||
opts, args = p.parse_args(args)
|
|
||||||
|
|
||||||
|
def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
|
||||||
if notification is None:
|
if notification is None:
|
||||||
from libprs500.terminfo import TerminalController, ProgressBar
|
from libprs500.terminfo import TerminalController, ProgressBar
|
||||||
term = TerminalController(sys.stdout)
|
term = TerminalController(sys.stdout)
|
||||||
@ -89,18 +91,15 @@ def main(args=sys.argv, notification=None, handler=None):
|
|||||||
else:
|
else:
|
||||||
notification = no_progress_bar
|
notification = no_progress_bar
|
||||||
|
|
||||||
if len(args) != 2 and opts.feeds is None:
|
|
||||||
p.print_help()
|
|
||||||
return 1
|
|
||||||
|
|
||||||
recipe = None
|
recipe = None
|
||||||
if opts.feeds is not None:
|
if opts.feeds is not None:
|
||||||
recipe = BasicNewsRecipe
|
recipe = BasicNewsRecipe
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if os.access(args[1], os.R_OK):
|
if os.access(recipe_arg, os.R_OK):
|
||||||
try:
|
try:
|
||||||
recipe = compile_recipe(open(args[1]).read())
|
recipe = compile_recipe(open(recipe_arg).read())
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -108,15 +107,13 @@ def main(args=sys.argv, notification=None, handler=None):
|
|||||||
else:
|
else:
|
||||||
raise Exception('not file')
|
raise Exception('not file')
|
||||||
except:
|
except:
|
||||||
recipe = get_builtin_recipe(args[1])
|
recipe = get_builtin_recipe(recipe_arg)
|
||||||
if recipe is None:
|
if recipe is None:
|
||||||
recipe = compile_recipe(args[1])
|
recipe = compile_recipe(recipe_arg)
|
||||||
|
|
||||||
if recipe is None:
|
if recipe is None:
|
||||||
p.print_help()
|
raise RecipeError(recipe_arg+ ' is an invalid recipe')
|
||||||
print
|
|
||||||
print args[1], 'is an invalid recipe'
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if handler is None:
|
if handler is None:
|
||||||
from libprs500 import ColoredFormatter
|
from libprs500 import ColoredFormatter
|
||||||
@ -125,9 +122,23 @@ def main(args=sys.argv, notification=None, handler=None):
|
|||||||
handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
|
handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
|
||||||
logging.getLogger('feeds2disk').addHandler(handler)
|
logging.getLogger('feeds2disk').addHandler(handler)
|
||||||
|
|
||||||
recipe = recipe(opts, p, notification)
|
recipe = recipe(opts, parser, notification)
|
||||||
|
if not os.path.exists(recipe.output_dir):
|
||||||
|
os.makedirs(recipe.output_dir)
|
||||||
recipe.download()
|
recipe.download()
|
||||||
|
|
||||||
|
return recipe
|
||||||
|
|
||||||
|
def main(args=sys.argv, notification=None, handler=None):
|
||||||
|
p = option_parser()
|
||||||
|
opts, args = p.parse_args(args)
|
||||||
|
|
||||||
|
if len(args) != 2 and opts.feeds is None:
|
||||||
|
p.print_help()
|
||||||
|
return 1
|
||||||
|
recipe_arg = args[1] if len(args) > 1 else None
|
||||||
|
run_recipe(opts, recipe_arg, p, notification=notification, handler=handler)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -165,6 +165,8 @@ class BasicNewsRecipe(object):
|
|||||||
'''
|
'''
|
||||||
if not self.feeds:
|
if not self.feeds:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
if self.test:
|
||||||
|
return self.feeds[:2]
|
||||||
return self.feeds
|
return self.feeds
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -225,9 +227,12 @@ class BasicNewsRecipe(object):
|
|||||||
@param parser: Command line option parser. Used to intelligently merge options.
|
@param parser: Command line option parser. Used to intelligently merge options.
|
||||||
@param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
@param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
||||||
'''
|
'''
|
||||||
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug'):
|
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
|
||||||
setattr(self, attr, getattr(options, attr))
|
setattr(self, attr, getattr(options, attr))
|
||||||
self.output_dir = os.path.abspath(self.output_dir)
|
self.output_dir = os.path.abspath(self.output_dir)
|
||||||
|
if options.test:
|
||||||
|
self.max_articles_per_feed = 2
|
||||||
|
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
|
||||||
|
|
||||||
self.logger = logging.getLogger('feeds2disk')
|
self.logger = logging.getLogger('feeds2disk')
|
||||||
|
|
||||||
@ -288,11 +293,13 @@ class BasicNewsRecipe(object):
|
|||||||
self.simultaneous_downloads = 1
|
self.simultaneous_downloads = 1
|
||||||
|
|
||||||
self.navbar = templates.NavBarTemplate()
|
self.navbar = templates.NavBarTemplate()
|
||||||
self.max_articles_per_feed -= 1
|
|
||||||
self.html2lrf_options.append('--use-spine')
|
self.html2lrf_options.append('--use-spine')
|
||||||
self.failed_downloads = []
|
self.failed_downloads = []
|
||||||
self.partial_failures = []
|
self.partial_failures = []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _postprocess_html(self, soup):
|
def _postprocess_html(self, soup):
|
||||||
if self.extra_css is not None:
|
if self.extra_css is not None:
|
||||||
head = soup.find('head')
|
head = soup.find('head')
|
||||||
@ -383,6 +390,8 @@ class BasicNewsRecipe(object):
|
|||||||
def build_index(self):
|
def build_index(self):
|
||||||
self.report_progress(0, _('Fetching feeds...'))
|
self.report_progress(0, _('Fetching feeds...'))
|
||||||
feeds = self.parse_feeds()
|
feeds = self.parse_feeds()
|
||||||
|
if self.test:
|
||||||
|
feeds = feeds[:2]
|
||||||
self.has_single_feed = len(feeds) == 1
|
self.has_single_feed = len(feeds) == 1
|
||||||
|
|
||||||
index = os.path.join(self.output_dir, 'index.html')
|
index = os.path.join(self.output_dir, 'index.html')
|
||||||
@ -460,13 +469,14 @@ class BasicNewsRecipe(object):
|
|||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
mi = MetaInformation(self.title + time.strftime(self.timefmt), [__appname__])
|
mi = MetaInformation(self.title + time.strftime(self.timefmt), [__appname__])
|
||||||
|
mi.author_sort = __appname__
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
opf = OPFCreator(dir, mi)
|
opf = OPFCreator(dir, mi)
|
||||||
|
|
||||||
|
|
||||||
manifest = ['feed_%d'%i for i in range(len(feeds))]
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
manifest.append('index.html')
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
cpath = getattr(self, 'cover_path', None)
|
cpath = getattr(self, 'cover_path', None)
|
||||||
if cpath is not None and os.access(cpath, os.R_OK):
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
opf.cover = cpath
|
opf.cover = cpath
|
||||||
|
@ -41,7 +41,7 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
'http://feeds.newsweek.com/newsweek/columnists/AnnaQuindlen',
|
'http://feeds.newsweek.com/newsweek/columnists/AnnaQuindlen',
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '#content { font:serif 1.2em; }'
|
extra_css = '#content { font:serif 12pt; }\n.story {font:12pt}\n.HorizontalHeader {font:18pt}\n.deck {font:16pt}'
|
||||||
keep_only_tags = [dict(name='div', id='content')]
|
keep_only_tags = [dict(name='div', id='content')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -54,11 +54,6 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
recursions = 1
|
recursions = 1
|
||||||
match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']
|
match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']
|
||||||
|
|
||||||
# For testing
|
|
||||||
#feeds = feeds[3:5]
|
|
||||||
#max_articles_per_feed = 2
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess_html(self, soup):
|
def postprocess_html(self, soup):
|
||||||
divs = list(soup.findAll('div', 'pagination'))
|
divs = list(soup.findAll('div', 'pagination'))
|
||||||
|
@ -37,9 +37,11 @@ def basename(url):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def save_soup(soup, target):
|
def save_soup(soup, target):
|
||||||
nm = Tag(soup, '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
|
ns = BeautifulSoup('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
|
||||||
meta = soup.find('meta', content=True)
|
nm = ns.find('meta')
|
||||||
if meta and 'charset' in meta['content']:
|
metas = soup.findAll('meta', content=True)
|
||||||
|
for meta in metas:
|
||||||
|
if 'charset' in meta['content']:
|
||||||
meta.replaceWith(nm)
|
meta.replaceWith(nm)
|
||||||
f = codecs.open(target, 'w', 'utf-8')
|
f = codecs.open(target, 'w', 'utf-8')
|
||||||
f.write(unicode(soup))
|
f.write(unicode(soup))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user