mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'ruff-pep8-strict' of https://github.com/un-pogaz/calibre
This commit is contained in:
commit
7e61ea2248
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@ -27,7 +26,7 @@ for name, src in sources.items():
|
|||||||
os.chdir(iconset)
|
os.chdir(iconset)
|
||||||
try:
|
try:
|
||||||
for sz in (16, 32, 128, 256, 512, 1024):
|
for sz in (16, 32, 128, 256, 512, 1024):
|
||||||
iname = 'icon_{0}x{0}.png'.format(sz)
|
iname = f'icon_{sz}x{sz}.png'
|
||||||
iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2)
|
iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2)
|
||||||
if src.endswith('.svg'):
|
if src.endswith('.svg'):
|
||||||
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
||||||
@ -36,7 +35,7 @@ for name, src in sources.items():
|
|||||||
if sz == 512:
|
if sz == 512:
|
||||||
shutil.copy2(src, iname)
|
shutil.copy2(src, iname)
|
||||||
else:
|
else:
|
||||||
subprocess.check_call(['convert', src, '-resize', '{0}x{0}'.format(sz), iname])
|
subprocess.check_call(['convert', src, '-resize', f'{sz}x{sz}', iname])
|
||||||
if sz > 16:
|
if sz > 16:
|
||||||
shutil.copy2(iname, iname2x)
|
shutil.copy2(iname, iname2x)
|
||||||
if sz > 512:
|
if sz > 512:
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@ -24,7 +23,7 @@ for name, src in sources.items():
|
|||||||
try:
|
try:
|
||||||
names = []
|
names = []
|
||||||
for sz in (16, 24, 32, 48, 64, 256):
|
for sz in (16, 24, 32, 48, 64, 256):
|
||||||
iname = os.path.join('ico_temp', '{0}x{0}.png'.format(sz))
|
iname = os.path.join('ico_temp', f'{sz}x{sz}.png')
|
||||||
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
||||||
subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname])
|
subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname])
|
||||||
if sz >= 128:
|
if sz >= 128:
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +26,7 @@ def clone_node(node, parent):
|
|||||||
def merge():
|
def merge():
|
||||||
base = os.path.dirname(os.path.abspath(__file__))
|
base = os.path.dirname(os.path.abspath(__file__))
|
||||||
ans = etree.fromstring(
|
ans = etree.fromstring(
|
||||||
'<svg xmlns="%s" xmlns:xlink="%s"/>' % (SVG_NS, XLINK_NS),
|
f'<svg xmlns="{SVG_NS}" xmlns:xlink="{XLINK_NS}"/>',
|
||||||
parser=etree.XMLParser(
|
parser=etree.XMLParser(
|
||||||
recover=True, no_network=True, resolve_entities=False
|
recover=True, no_network=True, resolve_entities=False
|
||||||
)
|
)
|
||||||
@ -43,14 +42,14 @@ def merge():
|
|||||||
recover=True, no_network=True, resolve_entities=False
|
recover=True, no_network=True, resolve_entities=False
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
symbol = ans.makeelement('{%s}symbol' % SVG_NS)
|
symbol = ans.makeelement('{%s}symbol' % SVG_NS) # noqa: UP031
|
||||||
symbol.set('viewBox', svg.get('viewBox'))
|
symbol.set('viewBox', svg.get('viewBox'))
|
||||||
symbol.set('id', 'icon-' + f.rpartition('.')[0])
|
symbol.set('id', 'icon-' + f.rpartition('.')[0])
|
||||||
for child in svg.iterchildren('*'):
|
for child in svg.iterchildren('*'):
|
||||||
clone_node(child, symbol)
|
clone_node(child, symbol)
|
||||||
ans.append(symbol)
|
ans.append(symbol)
|
||||||
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
|
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
|
||||||
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# calibre documentation build configuration file, created by
|
# calibre documentation build configuration file, created by
|
||||||
# sphinx-quickstart.py on Sun Mar 23 01:23:55 2008.
|
# sphinx-quickstart.py on Sun Mar 23 01:23:55 2008.
|
||||||
#
|
#
|
||||||
@ -47,11 +45,11 @@ templates_path = ['templates']
|
|||||||
source_suffix = {'.rst': 'restructuredtext'}
|
source_suffix = {'.rst': 'restructuredtext'}
|
||||||
|
|
||||||
# The master toctree document.
|
# The master toctree document.
|
||||||
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa
|
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa: F821
|
||||||
# kill the warning about index/simple_index not being in a toctree
|
# kill the warning about index/simple_index not being in a toctree
|
||||||
exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst']
|
exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst']
|
||||||
exclude_patterns.append('cli-options-header.rst')
|
exclude_patterns.append('cli-options-header.rst')
|
||||||
if tags.has('gettext'): # noqa
|
if tags.has('gettext'): # noqa: F821
|
||||||
# Do not exclude anything as the strings must be translated. This will
|
# Do not exclude anything as the strings must be translated. This will
|
||||||
# generate a warning about the documents not being in a toctree, just ignore
|
# generate a warning about the documents not being in a toctree, just ignore
|
||||||
# it.
|
# it.
|
||||||
@ -64,7 +62,7 @@ language = os.environ.get('CALIBRE_OVERRIDE_LANG', 'en')
|
|||||||
def generated_langs():
|
def generated_langs():
|
||||||
try:
|
try:
|
||||||
return os.listdir(os.path.join(base, 'generated'))
|
return os.listdir(os.path.join(base, 'generated'))
|
||||||
except EnvironmentError as e:
|
except OSError as e:
|
||||||
if e.errno != errno.ENOENT:
|
if e.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
return ()
|
return ()
|
||||||
@ -99,13 +97,13 @@ today_fmt = '%B %d, %Y'
|
|||||||
unused_docs = ['global', 'cli/global']
|
unused_docs = ['global', 'cli/global']
|
||||||
|
|
||||||
locale_dirs = ['locale/']
|
locale_dirs = ['locale/']
|
||||||
title = '%s User Manual' % __appname__
|
title = f'{__appname__} User Manual'
|
||||||
needs_localization = language not in {'en', 'eng'}
|
needs_localization = language not in {'en', 'eng'}
|
||||||
if needs_localization:
|
if needs_localization:
|
||||||
import gettext
|
import gettext
|
||||||
try:
|
try:
|
||||||
t = gettext.translation('simple_index', locale_dirs[0], [language])
|
t = gettext.translation('simple_index', locale_dirs[0], [language])
|
||||||
except IOError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
title = t.gettext(title)
|
title = t.gettext(title)
|
||||||
@ -176,7 +174,7 @@ def sort_languages(x):
|
|||||||
lc, name = x
|
lc, name = x
|
||||||
if lc == language:
|
if lc == language:
|
||||||
return ''
|
return ''
|
||||||
return sort_key(type(u'')(name))
|
return sort_key(str(name))
|
||||||
|
|
||||||
|
|
||||||
website = 'https://calibre-ebook.com'
|
website = 'https://calibre-ebook.com'
|
||||||
@ -193,13 +191,13 @@ extlinks = {
|
|||||||
}
|
}
|
||||||
del sort_languages, get_language
|
del sort_languages, get_language
|
||||||
|
|
||||||
epub_author = u'Kovid Goyal'
|
epub_author = 'Kovid Goyal'
|
||||||
epub_publisher = u'Kovid Goyal'
|
epub_publisher = 'Kovid Goyal'
|
||||||
epub_copyright = u'© {} Kovid Goyal'.format(date.today().year)
|
epub_copyright = f'© {date.today().year} Kovid Goyal'
|
||||||
epub_description = u'Comprehensive documentation for calibre'
|
epub_description = 'Comprehensive documentation for calibre'
|
||||||
epub_identifier = u'https://manual.calibre-ebook.com'
|
epub_identifier = 'https://manual.calibre-ebook.com'
|
||||||
epub_scheme = u'url'
|
epub_scheme = 'url'
|
||||||
epub_uid = u'S54a88f8e9d42455e9c6db000e989225f'
|
epub_uid = 'S54a88f8e9d42455e9c6db000e989225f'
|
||||||
epub_tocdepth = 4
|
epub_tocdepth = 4
|
||||||
epub_tocdup = True
|
epub_tocdup = True
|
||||||
epub_cover = ('epub_cover.jpg', 'epub_cover_template.html')
|
epub_cover = ('epub_cover.jpg', 'epub_cover_template.html')
|
||||||
@ -255,5 +253,5 @@ latex_show_pagerefs = True
|
|||||||
latex_show_urls = 'footnote'
|
latex_show_urls = 'footnote'
|
||||||
latex_elements = {
|
latex_elements = {
|
||||||
'papersize':'letterpaper',
|
'papersize':'letterpaper',
|
||||||
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'),
|
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), # noqa: UP031
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ def formatter_funcs():
|
|||||||
|
|
||||||
ans = {'doc': {}, 'sum': {}}
|
ans = {'doc': {}, 'sum': {}}
|
||||||
with TemporaryDirectory() as tdir:
|
with TemporaryDirectory() as tdir:
|
||||||
db = LibraryDatabase(tdir) # needed to load formatter_funcs
|
db = LibraryDatabase(tdir) # needed to load formatter_funcs
|
||||||
ffml = FFMLProcessor()
|
ffml = FFMLProcessor()
|
||||||
all_funcs = formatter_functions().get_builtins()
|
all_funcs = formatter_functions().get_builtins()
|
||||||
for func_name, func in all_funcs.items():
|
for func_name, func in all_funcs.items():
|
||||||
@ -195,13 +195,13 @@ details and examples.
|
|||||||
lines = []
|
lines = []
|
||||||
for cmd in COMMANDS:
|
for cmd in COMMANDS:
|
||||||
parser = option_parser_for(cmd)()
|
parser = option_parser_for(cmd)()
|
||||||
lines += ['.. _calibredb-%s-%s:' % (language, cmd), '']
|
lines += [f'.. _calibredb-{language}-{cmd}:', '']
|
||||||
lines += [cmd, '~'*20, '']
|
lines += [cmd, '~'*20, '']
|
||||||
usage = parser.usage.strip()
|
usage = parser.usage.strip()
|
||||||
usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()]
|
usage = usage.replace('%prog', 'calibredb').splitlines()
|
||||||
cmdline = ' '+usage[0]
|
cmdline = ' '+usage[0]
|
||||||
usage = usage[1:]
|
usage = usage[1:]
|
||||||
usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage]
|
usage = [re.sub(rf'({cmd})([^a-zA-Z0-9])', r':command:`\1`\2', i) for i in usage]
|
||||||
lines += ['.. code-block:: none', '', cmdline, '']
|
lines += ['.. code-block:: none', '', cmdline, '']
|
||||||
lines += usage
|
lines += usage
|
||||||
groups = [(None, None, parser.option_list)]
|
groups = [(None, None, parser.option_list)]
|
||||||
@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
|
|||||||
parser, plumber = create_option_parser(['ebook-convert',
|
parser, plumber = create_option_parser(['ebook-convert',
|
||||||
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
|
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
|
||||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||||
parser.option_groups if g.title == "INPUT OPTIONS"]
|
parser.option_groups if g.title == 'INPUT OPTIONS']
|
||||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||||
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
|
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
|
||||||
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
|
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
|
||||||
'dummyi.'+pl.file_type, '-h'], default_log)
|
'dummyi.'+pl.file_type, '-h'], default_log)
|
||||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||||
parser.option_groups if g.title == "OUTPUT OPTIONS"]
|
parser.option_groups if g.title == 'OUTPUT OPTIONS']
|
||||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||||
|
|
||||||
@ -257,7 +257,7 @@ def generate_ebook_convert_help(preamble, app):
|
|||||||
def update_cli_doc(name, raw, language):
|
def update_cli_doc(name, raw, language):
|
||||||
if isinstance(raw, bytes):
|
if isinstance(raw, bytes):
|
||||||
raw = raw.decode('utf-8')
|
raw = raw.decode('utf-8')
|
||||||
path = 'generated/%s/%s.rst' % (language, name)
|
path = f'generated/{language}/{name}.rst'
|
||||||
old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else ''
|
old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else ''
|
||||||
if not os.path.exists(path) or old_raw != raw:
|
if not os.path.exists(path) or old_raw != raw:
|
||||||
import difflib
|
import difflib
|
||||||
@ -352,7 +352,7 @@ def cli_docs(language):
|
|||||||
usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()]
|
usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()]
|
||||||
cmdline = usage[0]
|
cmdline = usage[0]
|
||||||
usage = usage[1:]
|
usage = usage[1:]
|
||||||
usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage]
|
usage = [i.replace(cmd, f':command:`{cmd}`') for i in usage]
|
||||||
usage = '\n'.join(usage)
|
usage = '\n'.join(usage)
|
||||||
preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage)
|
preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage)
|
||||||
if cmd == 'ebook-convert':
|
if cmd == 'ebook-convert':
|
||||||
@ -382,7 +382,7 @@ def template_docs(language):
|
|||||||
|
|
||||||
def localized_path(app, langcode, pagename):
|
def localized_path(app, langcode, pagename):
|
||||||
href = app.builder.get_target_uri(pagename)
|
href = app.builder.get_target_uri(pagename)
|
||||||
href = re.sub(r'generated/[a-z]+/', 'generated/%s/' % langcode, href)
|
href = re.sub(r'generated/[a-z]+/', f'generated/{langcode}/', href)
|
||||||
prefix = '/'
|
prefix = '/'
|
||||||
if langcode != 'en':
|
if langcode != 'en':
|
||||||
prefix += langcode + '/'
|
prefix += langcode + '/'
|
||||||
@ -397,7 +397,7 @@ def add_html_context(app, pagename, templatename, context, *args):
|
|||||||
|
|
||||||
def guilabel_role(typ, rawtext, text, *args, **kwargs):
|
def guilabel_role(typ, rawtext, text, *args, **kwargs):
|
||||||
from sphinx.roles import GUILabel
|
from sphinx.roles import GUILabel
|
||||||
text = text.replace(u'->', u'\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
|
text = text.replace('->', '\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
|
||||||
return GUILabel()(typ, rawtext, text, *args, **kwargs)
|
return GUILabel()(typ, rawtext, text, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@ -405,7 +405,7 @@ def setup_man_pages(app):
|
|||||||
documented_cmds = get_cli_docs()[0]
|
documented_cmds = get_cli_docs()[0]
|
||||||
man_pages = []
|
man_pages = []
|
||||||
for cmd, option_parser in documented_cmds:
|
for cmd, option_parser in documented_cmds:
|
||||||
path = 'generated/%s/%s' % (app.config.language, cmd)
|
path = f'generated/{app.config.language}/{cmd}'
|
||||||
man_pages.append((
|
man_pages.append((
|
||||||
path, cmd, cmd, 'Kovid Goyal', 1
|
path, cmd, cmd, 'Kovid Goyal', 1
|
||||||
))
|
))
|
||||||
|
@ -49,8 +49,8 @@ class EPUBHelpBuilder(EpubBuilder):
|
|||||||
imgname = container.href_to_name(img.get('src'), name)
|
imgname = container.href_to_name(img.get('src'), name)
|
||||||
fmt, width, height = identify(container.raw_data(imgname))
|
fmt, width, height = identify(container.raw_data(imgname))
|
||||||
if width == -1:
|
if width == -1:
|
||||||
raise ValueError('Failed to read size of: %s' % imgname)
|
raise ValueError(f'Failed to read size of: {imgname}')
|
||||||
img.set('style', 'width: %dpx; height: %dpx' % (width, height))
|
img.set('style', f'width: {width}px; height: {height}px')
|
||||||
|
|
||||||
def fix_opf(self, container):
|
def fix_opf(self, container):
|
||||||
spine_names = {n for n, l in container.spine_names}
|
spine_names = {n for n, l in container.spine_names}
|
||||||
@ -75,7 +75,7 @@ class EPUBHelpBuilder(EpubBuilder):
|
|||||||
|
|
||||||
# Ensure that the cover-image property is set
|
# Ensure that the cover-image property is set
|
||||||
cover_id = rmap['_static/' + self.config.epub_cover[0]]
|
cover_id = rmap['_static/' + self.config.epub_cover[0]]
|
||||||
for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)):
|
for item in container.opf_xpath(f'//opf:item[@id="{cover_id}"]'):
|
||||||
item.set('properties', 'cover-image')
|
item.set('properties', 'cover-image')
|
||||||
for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'):
|
for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'):
|
||||||
item.set('properties', 'svg calibre:title-page')
|
item.set('properties', 'svg calibre:title-page')
|
||||||
|
@ -32,7 +32,7 @@ class DemoTool(Tool):
|
|||||||
def create_action(self, for_toolbar=True):
|
def create_action(self, for_toolbar=True):
|
||||||
# Create an action, this will be added to the plugins toolbar and
|
# Create an action, this will be added to the plugins toolbar and
|
||||||
# the plugins menu
|
# the plugins menu
|
||||||
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa
|
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa: F821
|
||||||
if not for_toolbar:
|
if not for_toolbar:
|
||||||
# Register a keyboard shortcut for this toolbar action. We only
|
# Register a keyboard shortcut for this toolbar action. We only
|
||||||
# register it for the action created for the menu, not the toolbar,
|
# register it for the action created for the menu, not the toolbar,
|
||||||
|
@ -13,13 +13,13 @@ from calibre.customize import FileTypePlugin
|
|||||||
|
|
||||||
class HelloWorld(FileTypePlugin):
|
class HelloWorld(FileTypePlugin):
|
||||||
|
|
||||||
name = 'Hello World Plugin' # Name of the plugin
|
name = 'Hello World Plugin' # Name of the plugin
|
||||||
description = 'Set the publisher to Hello World for all new conversions'
|
description = 'Set the publisher to Hello World for all new conversions'
|
||||||
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
|
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
|
||||||
author = 'Acme Inc.' # The author of this plugin
|
author = 'Acme Inc.' # The author of this plugin
|
||||||
version = (1, 0, 0) # The version number of this plugin
|
version = (1, 0, 0) # The version number of this plugin
|
||||||
file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to
|
file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to
|
||||||
on_postprocess = True # Run this plugin after conversion is complete
|
on_postprocess = True # Run this plugin after conversion is complete
|
||||||
minimum_calibre_version = (0, 7, 53)
|
minimum_calibre_version = (0, 7, 53)
|
||||||
|
|
||||||
def run(self, path_to_ebook):
|
def run(self, path_to_ebook):
|
||||||
|
@ -76,5 +76,3 @@ class InterfacePluginDemo(InterfaceActionBase):
|
|||||||
ac = self.actual_plugin_
|
ac = self.actual_plugin_
|
||||||
if ac is not None:
|
if ac is not None:
|
||||||
ac.apply_settings()
|
ac.apply_settings()
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ class DemoDialog(QDialog):
|
|||||||
self.l.addWidget(self.view_button)
|
self.l.addWidget(self.view_button)
|
||||||
|
|
||||||
self.update_metadata_button = QPushButton(
|
self.update_metadata_button = QPushButton(
|
||||||
'Update metadata in a book\'s files', self)
|
"Update metadata in a book's files", self)
|
||||||
self.update_metadata_button.clicked.connect(self.update_metadata)
|
self.update_metadata_button.clicked.connect(self.update_metadata)
|
||||||
self.l.addWidget(self.update_metadata_button)
|
self.l.addWidget(self.update_metadata_button)
|
||||||
|
|
||||||
|
@ -54,8 +54,8 @@ class checkbox(nodes.Element):
|
|||||||
def visit_checkbox(self, node):
|
def visit_checkbox(self, node):
|
||||||
cid = node['ids'][0]
|
cid = node['ids'][0]
|
||||||
node['classes'] = []
|
node['classes'] = []
|
||||||
self.body.append('<input id="{0}" type="checkbox" />'
|
self.body.append(f'<input id="{cid}" type="checkbox" />'
|
||||||
'<label for="{0}"> </label>'.format(cid))
|
f'<label for="{cid}"> </label>')
|
||||||
|
|
||||||
|
|
||||||
def modify_li(li):
|
def modify_li(li):
|
||||||
@ -66,7 +66,7 @@ def modify_li(li):
|
|||||||
li['classes'].append('leaf-node')
|
li['classes'].append('leaf-node')
|
||||||
else:
|
else:
|
||||||
c = checkbox()
|
c = checkbox()
|
||||||
c['ids'] = ['collapse-checkbox-{}'.format(next(id_counter))]
|
c['ids'] = [f'collapse-checkbox-{next(id_counter)}']
|
||||||
li.insert(0, c)
|
li.insert(0, c)
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ def generate_template_language_help(language, log):
|
|||||||
a = output.append
|
a = output.append
|
||||||
|
|
||||||
with TemporaryDirectory() as tdir:
|
with TemporaryDirectory() as tdir:
|
||||||
db = LibraryDatabase(tdir) # needed to load formatter_funcs
|
db = LibraryDatabase(tdir) # needed to load formatter_funcs
|
||||||
ffml = FFMLProcessor()
|
ffml = FFMLProcessor()
|
||||||
all_funcs = formatter_functions().get_builtins()
|
all_funcs = formatter_functions().get_builtins()
|
||||||
categories = defaultdict(dict)
|
categories = defaultdict(dict)
|
||||||
@ -89,5 +89,6 @@ def generate_template_language_help(language, log):
|
|||||||
a(POSTAMBLE)
|
a(POSTAMBLE)
|
||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
generate_template_language_help()
|
generate_template_language_help()
|
||||||
|
@ -21,10 +21,11 @@ quote-style = 'single'
|
|||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
ignore = ['E402', 'E722', 'E741']
|
ignore = ['E402', 'E722', 'E741']
|
||||||
select = ['E', 'F', 'I', 'W', 'INT']
|
select = ['E', 'F', 'I', 'W', 'INT', 'PIE794']
|
||||||
|
unfixable = ['PIE794']
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501', 'W191']
|
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501']
|
||||||
"src/qt/*.py" = ['I']
|
"src/qt/*.py" = ['I']
|
||||||
"src/qt/*.pyi" = ['I']
|
"src/qt/*.pyi" = ['I']
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
use_archive = True
|
use_archive = True
|
||||||
|
|
||||||
|
|
||||||
def E(parent, name, text='', **attrs):
|
def E(parent, name, text='', **attrs):
|
||||||
ans = parent.makeelement(name, **attrs)
|
ans = parent.makeelement(name, **attrs)
|
||||||
ans.text = text
|
ans.text = text
|
||||||
@ -60,8 +61,8 @@ if use_archive:
|
|||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
body = root.xpath('//body')[0]
|
body = root.xpath('//body')[0]
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||||
try:
|
try:
|
||||||
date = data['dateModified']
|
date = data['dateModified']
|
||||||
@ -95,7 +96,7 @@ else:
|
|||||||
for child in tuple(body):
|
for child in tuple(body):
|
||||||
body.remove(child)
|
body.remove(child)
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
|
E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', replace_entities(data['headline']))
|
E(article, 'h1', replace_entities(data['headline']))
|
||||||
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
|
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
|
||||||
if data['dateline'] is None:
|
if data['dateline'] is None:
|
||||||
@ -157,7 +158,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||||
|
|
||||||
__author__ = "Kovid Goyal"
|
__author__ = 'Kovid Goyal'
|
||||||
description = (
|
description = (
|
||||||
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
|
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
|
||||||
'an unworthy, timid ignorance obstructing our progress.”'
|
'an unworthy, timid ignorance obstructing our progress.”'
|
||||||
@ -170,7 +171,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
|
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
|
||||||
dict(attrs={'aria-label': "Article Teaser"}),
|
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||||
dict(attrs={
|
dict(attrs={
|
||||||
'class': [
|
'class': [
|
||||||
'dblClkTrk', 'ec-article-info', 'share_inline_header',
|
'dblClkTrk', 'ec-article-info', 'share_inline_header',
|
||||||
@ -224,13 +225,13 @@ class Economist(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# return self.economist_test_article()
|
# return self.economist_test_article()
|
||||||
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
|
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
|
||||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||||
if script_tag is None:
|
if script_tag is None:
|
||||||
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
|
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
|
||||||
data = json.loads(script_tag.string)
|
data = json.loads(script_tag.string)
|
||||||
content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1]
|
content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1]
|
||||||
query = {
|
query = {
|
||||||
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa
|
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501
|
||||||
'operationName': 'HubsDataQuery',
|
'operationName': 'HubsDataQuery',
|
||||||
'variables': '{{"id":"/content/{}","size":40}}'.format(content_id),
|
'variables': '{{"id":"/content/{}","size":40}}'.format(content_id),
|
||||||
}
|
}
|
||||||
@ -247,22 +248,22 @@ class Economist(BasicNewsRecipe):
|
|||||||
self.description = data['description']
|
self.description = data['description']
|
||||||
|
|
||||||
feeds_dict = defaultdict(list)
|
feeds_dict = defaultdict(list)
|
||||||
for part in safe_dict(data, "hasPart", "parts"):
|
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||||
section = part['title']
|
section = part['title']
|
||||||
self.log(section)
|
self.log(section)
|
||||||
for art in safe_dict(part, "hasPart", "parts"):
|
for art in safe_dict(part, 'hasPart', 'parts'):
|
||||||
title = safe_dict(art, "title")
|
title = safe_dict(art, 'title')
|
||||||
desc = safe_dict(art, "rubric") or ''
|
desc = safe_dict(art, 'rubric') or ''
|
||||||
sub = safe_dict(art, "flyTitle") or ''
|
sub = safe_dict(art, 'flyTitle') or ''
|
||||||
if sub and section != sub:
|
if sub and section != sub:
|
||||||
desc = sub + ' :: ' + desc
|
desc = sub + ' :: ' + desc
|
||||||
pt = PersistentTemporaryFile('.html')
|
pt = PersistentTemporaryFile('.html')
|
||||||
pt.write(json.dumps(art).encode('utf-8'))
|
pt.write(json.dumps(art).encode('utf-8'))
|
||||||
pt.close()
|
pt.close()
|
||||||
url = 'file:///' + pt.name
|
url = 'file:///' + pt.name
|
||||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||||
self.log('\t', title, '\n\t\t', desc)
|
self.log('\t', title, '\n\t\t', desc)
|
||||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
return list(feeds_dict.items())
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
article.url = soup.find('h1')['title']
|
article.url = soup.find('h1')['title']
|
||||||
@ -273,7 +274,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/')
|
'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/')
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
else: # Load articles from individual article pages {{{
|
else: # Load articles from individual article pages {{{
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
@ -311,26 +312,26 @@ class Economist(BasicNewsRecipe):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def economist_parse_index(self, soup):
|
def economist_parse_index(self, soup):
|
||||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||||
if script_tag is not None:
|
if script_tag is not None:
|
||||||
data = json.loads(script_tag.string)
|
data = json.loads(script_tag.string)
|
||||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||||
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
|
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||||
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
|
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
|
|
||||||
for coll in safe_dict(data, "props", "pageProps", "content", "collections"):
|
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
|
||||||
section = safe_dict(coll, "headline") or ''
|
section = safe_dict(coll, 'headline') or ''
|
||||||
self.log(section)
|
self.log(section)
|
||||||
articles = []
|
articles = []
|
||||||
for part in safe_dict(coll, "hasPart", "parts"):
|
for part in safe_dict(coll, 'hasPart', 'parts'):
|
||||||
title = safe_dict(part, "headline") or ''
|
title = safe_dict(part, 'headline') or ''
|
||||||
url = safe_dict(part, "url", "canonical") or ''
|
url = safe_dict(part, 'url', 'canonical') or ''
|
||||||
if not title or not url:
|
if not title or not url:
|
||||||
continue
|
continue
|
||||||
desc = safe_dict(part, "description") or ''
|
desc = safe_dict(part, 'description') or ''
|
||||||
sub = safe_dict(part, "subheadline") or ''
|
sub = safe_dict(part, 'subheadline') or ''
|
||||||
if sub:
|
if sub:
|
||||||
desc = sub + ' :: ' + desc
|
desc = sub + ' :: ' + desc
|
||||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||||
@ -341,7 +342,6 @@ class Economist(BasicNewsRecipe):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
def preprocess_raw_html(self, raw, url):
|
||||||
# open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
|
# open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
|
||||||
if use_archive:
|
if use_archive:
|
||||||
@ -358,9 +358,9 @@ class Economist(BasicNewsRecipe):
|
|||||||
cleanup_html_article(root)
|
cleanup_html_article(root)
|
||||||
|
|
||||||
if '/interactive/' in url:
|
if '/interactive/' in url:
|
||||||
return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \
|
return ('<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>'
|
||||||
+ 'This article is supposed to be read in a browser' \
|
'This article is supposed to be read in a browser.'
|
||||||
+ '</em></article></body></html>'
|
'</em></article></body></html>')
|
||||||
|
|
||||||
for div in root.xpath('//div[@class="lazy-image"]'):
|
for div in root.xpath('//div[@class="lazy-image"]'):
|
||||||
noscript = list(div.iter('noscript'))
|
noscript = list(div.iter('noscript'))
|
||||||
|
@ -36,22 +36,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
|
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa: E501
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
|
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
|
||||||
remove_tags_after = dict(
|
remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']})
|
||||||
name='div', attrs={'class': ['related-news', 'col']})
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
|
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
p{text-align: justify; font-size: 100%}
|
p{text-align: justify; font-size: 100%}
|
||||||
body{ text-align: left; font-size:100% }
|
body{ text-align: left; font-size:100% }
|
||||||
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
"""
|
'''
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(
|
preprocess_regexps = [(re.compile(
|
||||||
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||||
|
@ -9,8 +9,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
q = frozenset(classes.split(' '))
|
q = frozenset(classes.split(' '))
|
||||||
return dict(attrs={
|
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
||||||
|
|
||||||
|
|
||||||
class Minutes(BasicNewsRecipe):
|
class Minutes(BasicNewsRecipe):
|
||||||
|
@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, url in [
|
for title, url in [
|
||||||
("They Draw and Cook", "http://www.theydrawandcook.com/")
|
('They Draw and Cook', 'http://www.theydrawandcook.com/')
|
||||||
]:
|
]:
|
||||||
articles = self.make_links(url)
|
articles = self.make_links(url)
|
||||||
if articles:
|
if articles:
|
||||||
|
@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class TheMITPressReader(BasicNewsRecipe):
|
class TheMITPressReader(BasicNewsRecipe):
|
||||||
title = "The MIT Press Reader"
|
title = 'The MIT Press Reader'
|
||||||
__author__ = 'yodha8'
|
__author__ = 'yodha8'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors."
|
description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
|
||||||
" This recipe pulls articles from the past 7 days.")
|
' This recipe pulls articles from the past 7 days.')
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
@ -38,9 +38,9 @@ class ABCNews(BasicNewsRecipe):
|
|||||||
if d and isinstance(d, str):
|
if d and isinstance(d, str):
|
||||||
self.oldest_article = float(d)
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
# auto_cleanup = True # enable this as a backup option if recipe stops working
|
# auto_cleanup = True # enable this as a backup option if recipe stops working
|
||||||
|
|
||||||
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
|
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -57,7 +57,7 @@ class ABCNews(BasicNewsRecipe):
|
|||||||
# Clear out all the unwanted html tags:
|
# Clear out all the unwanted html tags:
|
||||||
# ************************************
|
# ************************************
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)})
|
# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)})
|
||||||
{
|
{
|
||||||
'name': ['meta', 'link', 'noscript', 'aside']
|
'name': ['meta', 'link', 'noscript', 'aside']
|
||||||
},
|
},
|
||||||
@ -98,12 +98,12 @@ class ABCNews(BasicNewsRecipe):
|
|||||||
('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'),
|
('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'),
|
||||||
('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'),
|
('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'),
|
||||||
('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'),
|
('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'),
|
||||||
# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line
|
# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line
|
# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line
|
# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line
|
# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line
|
# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line
|
# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line
|
# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line
|
||||||
# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line
|
# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line
|
||||||
]
|
]
|
||||||
|
@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
|||||||
if d and isinstance(d, str):
|
if d and isinstance(d, str):
|
||||||
self.oldest_article = float(d)
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
p{text-align: justify; font-size: 100%}
|
p{text-align: justify; font-size: 100%}
|
||||||
body{ text-align: left; font-size:100% }
|
body{ text-align: left; font-size:100% }
|
||||||
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
|
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
|
||||||
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
|
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
|
||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
"""
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
|
||||||
|
@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
|
|||||||
lambda m: '<title>' + m.group(1) + '</title>'),
|
lambda m: '<title>' + m.group(1) + '</title>'),
|
||||||
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
||||||
|
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
.chapo{font-style:italic; margin: 1em 0 0.5em}
|
.chapo{font-style:italic; margin: 1em 0 0.5em}
|
||||||
"""
|
'''
|
||||||
|
@ -85,9 +85,10 @@ class ADRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
parts = url.split('/')
|
parts = url.split('/')
|
||||||
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
print_url = 'http://' + '/'.join([
|
||||||
+ parts[10] + '/' + parts[7] + '/print/' + \
|
parts[2], parts[3], parts[4], parts[5], parts[10],
|
||||||
parts[8] + '/' + parts[9] + '/' + parts[13]
|
parts[7], 'print', parts[8], parts[9], parts[13],
|
||||||
|
])
|
||||||
|
|
||||||
return print_url
|
return print_url
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ class Adevarul(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
|
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa: E501
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
|
@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = u'http://www.adventuregamers.com'
|
INDEX = u'http://www.adventuregamers.com'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||||
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
||||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||||
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
.score_header{font-size: large; color: #50544A}
|
.score_header{font-size: large; color: #50544A}
|
||||||
img{margin-bottom: 1em;}
|
img{margin-bottom: 1em;}
|
||||||
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
|
@ -8,13 +8,14 @@ def absurl(url):
|
|||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
return 'https://www.afr.com' + url
|
return 'https://www.afr.com' + url
|
||||||
|
|
||||||
|
|
||||||
class afr(BasicNewsRecipe):
|
class afr(BasicNewsRecipe):
|
||||||
title = 'Australian Financial Review'
|
title = 'Australian Financial Review'
|
||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
description = (
|
description = (
|
||||||
'For more than 65 years The Australian Financial Review has been the authority on business,'
|
'For more than 65 years The Australian Financial Review has been the authority on business,'
|
||||||
' finance and investment news in Australia. It has a reputation for independent, award-winning '
|
' finance and investment news in Australia. It has a reputation for independent, award-winning '
|
||||||
'journalism and is essential reading for Australia\'s business and investor community.'
|
"journalism and is essential reading for Australia's business and investor community."
|
||||||
)
|
)
|
||||||
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
|
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
@ -24,7 +25,6 @@ class afr(BasicNewsRecipe):
|
|||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
@ -39,7 +39,7 @@ class afr(BasicNewsRecipe):
|
|||||||
dict(name=['button', 'aside', 'svg']),
|
dict(name=['button', 'aside', 'svg']),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after= [ dict(name='aside', attrs={'id':'stickyContainer'})]
|
remove_tags_after= [dict(name='aside', attrs={'id':'stickyContainer'})]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
#img-cap {font-size:small; text-align:center;}
|
#img-cap {font-size:small; text-align:center;}
|
||||||
|
@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def default_cover(self, cover_file):
|
def default_cover(self, cover_file):
|
||||||
"""
|
'''
|
||||||
Crée une couverture personnalisée avec le logo
|
Crée une couverture personnalisée avec le logo
|
||||||
"""
|
'''
|
||||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||||
|
|
||||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||||
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
weekday = french_weekday[wkd]
|
weekday = french_weekday[wkd]
|
||||||
month = french_month[today.month]
|
month = french_month[today.month]
|
||||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||||
edition = today.strftime('Édition de %Hh')
|
edition = today.strftime('Édition de %Hh')
|
||||||
|
|
||||||
# Image de base
|
# Image de base
|
||||||
|
@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Agents(BasicNewsRecipe):
|
class Agents(BasicNewsRecipe):
|
||||||
title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
|
title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
|
||||||
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa
|
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa: E501
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
publisher = 'Project Media'
|
publisher = 'Project Media'
|
||||||
publication_type = 'news'
|
publication_type = 'news'
|
||||||
|
@ -32,7 +32,7 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
remove_attributes = []
|
remove_attributes = []
|
||||||
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
|
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
|
||||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||||
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
||||||
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
|
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
|
||||||
dict(name='div', attrs={'class': 'itemcomment id0'}),
|
dict(name='div', attrs={'class': 'itemcomment id0'}),
|
||||||
dict(name='div', attrs={'class': 'hlavicka'}),
|
dict(name='div', attrs={'class': 'hlavicka'}),
|
||||||
|
@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
body{font-family: Arial,sans-serif}
|
body{font-family: Arial,sans-serif}
|
||||||
"""
|
'''
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description, 'tags': category,
|
'comment': description, 'tags': category,
|
||||||
'publisher': publisher, 'language': language
|
'publisher': publisher, 'language': language
|
||||||
@ -55,7 +55,7 @@ class AlJazeera(BasicNewsRecipe):
|
|||||||
u'http://www.aljazeera.com/xml/rss/all.xml')]
|
u'http://www.aljazeera.com/xml/rss/all.xml')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
artlurl = article.get('link', None)
|
artlurl = article.get('link', None)
|
||||||
return artlurl
|
return artlurl
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -22,7 +22,7 @@ class AlMasryAlyoum(BasicNewsRecipe):
|
|||||||
category = 'News'
|
category = 'News'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
|
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa: E501
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class': ['article']})
|
dict(name='div', attrs={'class': ['article']})
|
||||||
|
@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
title = title[0:120] + '...'
|
title = title[0:120] + '...'
|
||||||
href = link.get('href')
|
href = link.get('href')
|
||||||
if not href:
|
if not href:
|
||||||
self._p("BAD HREF: " + str(link))
|
self._p('BAD HREF: ' + str(link))
|
||||||
return
|
return
|
||||||
self.queue_article_link(section, href, title)
|
self.queue_article_link(section, href, title)
|
||||||
|
|
||||||
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
|
|
||||||
age = (datetime.datetime.now() - date).days
|
age = (datetime.datetime.now() - date).days
|
||||||
if (age > self.oldest_article):
|
if (age > self.oldest_article):
|
||||||
return "too old"
|
return 'too old'
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def scrape_article_date(self, soup):
|
def scrape_article_date(self, soup):
|
||||||
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
def date_from_string(self, datestring):
|
def date_from_string(self, datestring):
|
||||||
try:
|
try:
|
||||||
# eg: Posted September 17, 2014
|
# eg: Posted September 17, 2014
|
||||||
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
|
dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
|
||||||
except:
|
except:
|
||||||
dt = None
|
dt = None
|
||||||
|
|
||||||
@ -203,11 +203,10 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
return self.tag_to_string(n).strip()
|
return self.tag_to_string(n).strip()
|
||||||
|
|
||||||
def _dbg_soup_node(self, node):
|
def _dbg_soup_node(self, node):
|
||||||
s = ' cls: ' + str(node.get('class')).strip() + \
|
return (' cls: ' + str(node.get('class')).strip() +
|
||||||
' id: ' + str(node.get('id')).strip() + \
|
' id: ' + str(node.get('id')).strip() +
|
||||||
' role: ' + str(node.get('role')).strip() + \
|
' role: ' + str(node.get('role')).strip() +
|
||||||
' txt: ' + self.text(node)
|
' txt: ' + self.text(node))
|
||||||
return s
|
|
||||||
|
|
||||||
def _p(self, msg):
|
def _p(self, msg):
|
||||||
curframe = inspect.currentframe()
|
curframe = inspect.currentframe()
|
||||||
|
@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||||
title = u'Albert Mohler\'s Blog'
|
title = u"Albert Mohler's Blog"
|
||||||
__author__ = 'Peter Grungi'
|
__author__ = 'Peter Grungi'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
oldest_article = 90
|
oldest_article = 90
|
||||||
@ -13,8 +13,7 @@ class AlbertMohlersBlog(BasicNewsRecipe):
|
|||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
|
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
|
||||||
publisher = 'Albert Mohler'
|
publisher = 'Albert Mohler'
|
||||||
language = 'en'
|
|
||||||
author = 'Albert Mohler'
|
author = 'Albert Mohler'
|
||||||
|
|
||||||
feeds = [(u'Albert Mohler\'s Blog',
|
feeds = [(u"Albert Mohler's Blog",
|
||||||
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||||
|
@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
|
|||||||
# Extract a list of dates from the page.
|
# Extract a list of dates from the page.
|
||||||
# Subset this out to the list of target dates for extraction.
|
# Subset this out to the list of target dates for extraction.
|
||||||
date_list = []
|
date_list = []
|
||||||
for div in soup.findAll('div', attrs={'id': "dayheader"}):
|
for div in soup.findAll('div', attrs={'id': 'dayheader'}):
|
||||||
date_list.append(self.tag_to_string(div))
|
date_list.append(self.tag_to_string(div))
|
||||||
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
|
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
|
||||||
date_list_bool = [
|
date_list_bool = [
|
||||||
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Process each paragraph one by one.
|
# Process each paragraph one by one.
|
||||||
# Stop when the text of the previous div is not in the target date list.
|
# Stop when the text of the previous div is not in the target date list.
|
||||||
for div in soup.findAll('div', attrs={'class': "mobile-front"}):
|
for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
|
||||||
for p in div.findAll('p'):
|
for p in div.findAll('p'):
|
||||||
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
|
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
|
||||||
if p.find('a'):
|
if p.find('a'):
|
||||||
title = self.tag_to_string(p)
|
title = self.tag_to_string(p)
|
||||||
link = p.find('a')['href']
|
link = p.find('a')['href']
|
||||||
if self.tag_to_string(p.findPreviousSibling('h3')
|
if self.tag_to_string(p.findPreviousSibling('h3')
|
||||||
) == "Articles of Note":
|
) == 'Articles of Note':
|
||||||
articles_note.append({
|
articles_note.append({
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': link,
|
'url': link,
|
||||||
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
|
|||||||
'date': ''
|
'date': ''
|
||||||
})
|
})
|
||||||
elif self.tag_to_string(p.findPreviousSibling('h3')
|
elif self.tag_to_string(p.findPreviousSibling('h3')
|
||||||
) == "New Books":
|
) == 'New Books':
|
||||||
new_books.append({
|
new_books.append({
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': link,
|
'url': link,
|
||||||
|
@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
|
|||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
tag = soup.find(attrs={'class': 'rodzaj'})
|
tag = soup.find(attrs={'class': 'rodzaj'})
|
||||||
if tag and tag.a.string.lower().strip() == 'recenzje':
|
if tag and tag.a.string.lower().strip() == 'recenzje':
|
||||||
link = soup.find(text=re.compile('recenzuje'))
|
link = soup.find(text=re.compile(r'recenzuje'))
|
||||||
if link:
|
if link:
|
||||||
return self.index_to_soup(link.parent['href'], raw=True)
|
return self.index_to_soup(link.parent['href'], raw=True)
|
||||||
|
@ -21,7 +21,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
|||||||
remove_images = False
|
remove_images = False
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
"""Récupère dynamiquement l'URL de la dernière une depuis MLP"""
|
'''Récupère dynamiquement l'URL de la dernière une depuis MLP'''
|
||||||
br = self.get_browser()
|
br = self.get_browser()
|
||||||
try:
|
try:
|
||||||
# Accéder à la page du magazine sur MLP
|
# Accéder à la page du magazine sur MLP
|
||||||
@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
|||||||
self.log('Cover URL found:', cover_url)
|
self.log('Cover URL found:', cover_url)
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut')
|
self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
|
||||||
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
|
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -92,7 +92,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
|||||||
display_name = section_name.replace('-', ' ').title()
|
display_name = section_name.replace('-', ' ').title()
|
||||||
articles.append((display_name, feed_articles[:self.max_articles_per_feed]))
|
articles.append((display_name, feed_articles[:self.max_articles_per_feed]))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.error(f'Error processing {section_name}: {str(e)}')
|
self.log.error(f'Error processing {section_name}: {e}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return articles
|
return articles
|
||||||
@ -133,7 +133,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
|||||||
'description': ''
|
'description': ''
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.error(f'Error getting H1 title for {article_url}: {str(e)}')
|
self.log.error(f'Error getting H1 title for {article_url}: {e}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return feed_articles
|
return feed_articles
|
||||||
|
@ -21,11 +21,9 @@ class WwwAltomdata_dk(BasicNewsRecipe):
|
|||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'),
|
('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'),
|
||||||
('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'),
|
('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class AM730(BasicNewsRecipe):
|
|||||||
description = 'http://www.am730.com.hk'
|
description = 'http://www.am730.com.hk'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
|
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa: E501
|
||||||
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
|
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
|
||||||
dict(name='div',attrs={'class':'logo-container print-logo'}),
|
dict(name='div',attrs={'class':'logo-container print-logo'}),
|
||||||
dict(name='div',attrs={'id':'galleria'})]
|
dict(name='div',attrs={'id':'galleria'})]
|
||||||
@ -53,12 +53,12 @@ class AM730(BasicNewsRecipe):
|
|||||||
return self.masthead_url
|
return self.masthead_url
|
||||||
|
|
||||||
def getAMSectionArticles(self, sectionName,url):
|
def getAMSectionArticles(self, sectionName,url):
|
||||||
# print sectionName
|
# print(sectionName)
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
articles = []
|
articles = []
|
||||||
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
||||||
href = aTag.get('href',False)
|
href = aTag.get('href',False)
|
||||||
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
|
if not href.encode('utf-8').startswith(url.encode('utf-8')):
|
||||||
continue # not in same section
|
continue # not in same section
|
||||||
|
|
||||||
title = href.split('/')[-1].split('-')[0]
|
title = href.split('/')[-1].split('-')[0]
|
||||||
@ -67,7 +67,7 @@ class AM730(BasicNewsRecipe):
|
|||||||
print(title)
|
print(title)
|
||||||
try:
|
try:
|
||||||
if articles.index({'title':title,'url':href})>=0:
|
if articles.index({'title':title,'url':href})>=0:
|
||||||
# print 'already added'
|
# print('already added')
|
||||||
continue # already added
|
continue # already added
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
@ -78,7 +78,7 @@ class AM730(BasicNewsRecipe):
|
|||||||
break
|
break
|
||||||
if self.debug:
|
if self.debug:
|
||||||
print(articles)
|
print(articles)
|
||||||
return (sectionName,articles)
|
return sectionName, articles
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# hard code sections
|
# hard code sections
|
||||||
@ -89,12 +89,12 @@ class AM730(BasicNewsRecipe):
|
|||||||
('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'),
|
('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'),
|
||||||
('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'),
|
('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'),
|
||||||
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
|
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
|
||||||
] # articles =[]
|
] # articles =[]
|
||||||
SectionsArticles=[]
|
SectionsArticles=[]
|
||||||
for (title, url) in Sections:
|
for title, url in Sections:
|
||||||
if self.debug:
|
if self.debug:
|
||||||
print(title)
|
print(title)
|
||||||
print(url)
|
print(url)
|
||||||
SectionsArticles.append(self.getAMSectionArticles(title,url))
|
SectionsArticles.append(self.getAMSectionArticles(title,url))
|
||||||
# feeds.append(articles[0]['url'])
|
# feeds.append(articles[0]['url'])
|
||||||
return SectionsArticles
|
return SectionsArticles
|
||||||
|
@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
|
|||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
|
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
body{font-family: Roboto, sans-serif}
|
body{font-family: Roboto, sans-serif}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description,
|
'comment': description,
|
||||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class AmericanThinker(BasicNewsRecipe):
|
class AmericanThinker(BasicNewsRecipe):
|
||||||
title = u'American Thinker'
|
title = u'American Thinker'
|
||||||
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
|
||||||
__author__ = 'Walt Anthony'
|
__author__ = 'Walt Anthony'
|
||||||
publisher = 'Thomas Lifson'
|
publisher = 'Thomas Lifson'
|
||||||
category = 'news, politics, USA'
|
category = 'news, politics, USA'
|
||||||
@ -33,7 +33,7 @@ class AmericanThinker(BasicNewsRecipe):
|
|||||||
root = html5lib.parse(
|
root = html5lib.parse(
|
||||||
clean_xml_chars(raw), treebuilder='lxml',
|
clean_xml_chars(raw), treebuilder='lxml',
|
||||||
namespaceHTMLElements=False)
|
namespaceHTMLElements=False)
|
||||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
|
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa: E501
|
||||||
x.getparent().remove(x)
|
x.getparent().remove(x)
|
||||||
return etree.tostring(root, encoding='unicode')
|
return etree.tostring(root, encoding='unicode')
|
||||||
|
|
||||||
|
@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
||||||
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
return url.replace('/show/', '/print/') # 2014-02-27 AGE: update
|
||||||
|
@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
description = (
|
description = (
|
||||||
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. '
|
"Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
|
||||||
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
|
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
|
||||||
'more than 5000 years of Egyptian history. Published bimonthly.'
|
'more than 5000 years of Egyptian history. Published bimonthly.'
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ from datetime import date
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
# figure out your local edition id from the log of this recipe
|
# figure out your local edition id from the log of this recipe
|
||||||
edi_id = 182 # NTR VIJAYAWADA - 182
|
edi_id = 182 # NTR VIJAYAWADA - 182
|
||||||
|
|
||||||
today = date.today().strftime('%d/%m/%Y')
|
today = date.today().strftime('%d/%m/%Y')
|
||||||
|
|
||||||
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
|
|||||||
|
|
||||||
index = 'https://epaper.andhrajyothy.com'
|
index = 'https://epaper.andhrajyothy.com'
|
||||||
|
|
||||||
|
|
||||||
class andhra(BasicNewsRecipe):
|
class andhra(BasicNewsRecipe):
|
||||||
title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్'
|
title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్'
|
||||||
language = 'te'
|
language = 'te'
|
||||||
@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe):
|
|||||||
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
|
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
|
||||||
for edi in edi_data:
|
for edi in edi_data:
|
||||||
if edi['org_location'] in {'Magazines', 'Navya Daily'}:
|
if edi['org_location'] in {'Magazines', 'Navya Daily'}:
|
||||||
continue
|
continue
|
||||||
self.log(edi['org_location'])
|
self.log(edi['org_location'])
|
||||||
cities = []
|
cities = []
|
||||||
for edi_loc in edi['editionlocation']:
|
for edi_loc in edi['editionlocation']:
|
||||||
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
|
|||||||
url = str(snaps['OrgId'])
|
url = str(snaps['OrgId'])
|
||||||
if snaps['ObjectType'] == 4:
|
if snaps['ObjectType'] == 4:
|
||||||
continue
|
continue
|
||||||
feeds_dict[section].append({"title": '', "url": url})
|
feeds_dict[section].append({'title': '', 'url': url})
|
||||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
return list(feeds_dict.items())
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, *a):
|
def preprocess_raw_html(self, raw, *a):
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
|
@ -5,7 +5,7 @@ from datetime import date
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
# figure out your local edition id from the log of this recipe
|
# figure out your local edition id from the log of this recipe
|
||||||
edi_id = 34 # HYDERABAD MAIN I - 34
|
edi_id = 34 # HYDERABAD MAIN I - 34
|
||||||
|
|
||||||
today = date.today().strftime('%d/%m/%Y')
|
today = date.today().strftime('%d/%m/%Y')
|
||||||
|
|
||||||
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
|
|||||||
|
|
||||||
index = 'https://epaper.andhrajyothy.com'
|
index = 'https://epaper.andhrajyothy.com'
|
||||||
|
|
||||||
|
|
||||||
class andhra(BasicNewsRecipe):
|
class andhra(BasicNewsRecipe):
|
||||||
title = 'ఆంధ్రజ్యోతి - తెలంగాణ'
|
title = 'ఆంధ్రజ్యోతి - తెలంగాణ'
|
||||||
language = 'te'
|
language = 'te'
|
||||||
@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe):
|
|||||||
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
|
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
|
||||||
for edi in edi_data:
|
for edi in edi_data:
|
||||||
if edi['org_location'] in {'Magazines', 'Navya Daily'}:
|
if edi['org_location'] in {'Magazines', 'Navya Daily'}:
|
||||||
continue
|
continue
|
||||||
self.log(edi['org_location'])
|
self.log(edi['org_location'])
|
||||||
cities = []
|
cities = []
|
||||||
for edi_loc in edi['editionlocation']:
|
for edi_loc in edi['editionlocation']:
|
||||||
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
|
|||||||
url = str(snaps['OrgId'])
|
url = str(snaps['OrgId'])
|
||||||
if snaps['ObjectType'] == 4:
|
if snaps['ObjectType'] == 4:
|
||||||
continue
|
continue
|
||||||
feeds_dict[section].append({"title": '', "url": url})
|
feeds_dict[section].append({'title': '', 'url': url})
|
||||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
return list(feeds_dict.items())
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, *a):
|
def preprocess_raw_html(self, raw, *a):
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
|
@ -16,5 +16,5 @@ class Android_com_pl(BasicNewsRecipe):
|
|||||||
remove_tags_after = [{'class': 'post-content'}]
|
remove_tags_after = [{'class': 'post-content'}]
|
||||||
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
|
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
(re.compile(r'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||||
|
@ -23,36 +23,36 @@ class AdvancedUserRecipe1718384518(BasicNewsRecipe):
|
|||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
#Phones
|
# Phones
|
||||||
('Phones', 'https://www.androidpolice.com/feed/phones/'),
|
('Phones', 'https://www.androidpolice.com/feed/phones/'),
|
||||||
('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'),
|
('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'),
|
||||||
('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'),
|
('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'),
|
||||||
('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'),
|
('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'),
|
||||||
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
|
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
|
||||||
#Google
|
# Google
|
||||||
('Google', 'https://www.androidpolice.com/feed/google/'),
|
('Google', 'https://www.androidpolice.com/feed/google/'),
|
||||||
('News about Google', 'https://www.androidpolice.com/feed/news-google/'),
|
('News about Google', 'https://www.androidpolice.com/feed/news-google/'),
|
||||||
('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'),
|
('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'),
|
||||||
('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'),
|
('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'),
|
||||||
('Features about Google', 'https://www.androidpolice.com/feed/features-google/'),
|
('Features about Google', 'https://www.androidpolice.com/feed/features-google/'),
|
||||||
#Operating Systems
|
# Operating Systems
|
||||||
('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'),
|
('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'),
|
||||||
('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'),
|
('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'),
|
||||||
('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'),
|
('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'),
|
||||||
('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'),
|
('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'),
|
||||||
#Chromebooks
|
# Chromebooks
|
||||||
('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'),
|
('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'),
|
||||||
('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'),
|
('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'),
|
||||||
('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'),
|
('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'),
|
||||||
('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'),
|
('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'),
|
||||||
#Gadgets
|
# Gadgets
|
||||||
('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'),
|
('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'),
|
||||||
('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'),
|
('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'),
|
||||||
('Audio', 'https://www.androidpolice.com/feed/tag/audio/'),
|
('Audio', 'https://www.androidpolice.com/feed/tag/audio/'),
|
||||||
('Accessories', 'https://www.androidpolice.com/feed/accessories/'),
|
('Accessories', 'https://www.androidpolice.com/feed/accessories/'),
|
||||||
('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'),
|
('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'),
|
||||||
('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'),
|
('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'),
|
||||||
#Reviews
|
# Reviews
|
||||||
('Reviews', 'https://www.androidpolice.com/feed/reviews/'),
|
('Reviews', 'https://www.androidpolice.com/feed/reviews/'),
|
||||||
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
|
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
|
||||||
('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'),
|
('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'),
|
||||||
|
@ -32,14 +32,11 @@ class AnimalPolitico(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.animalpolitico.com/')
|
soup = self.index_to_soup('http://www.animalpolitico.com/')
|
||||||
articles = []
|
articles = []
|
||||||
for a in soup(**{
|
for a in soup(name='a', attrs={
|
||||||
'name': 'a',
|
|
||||||
'attrs': {
|
|
||||||
'href': True, 'title': True,
|
'href': True, 'title': True,
|
||||||
'data-author': True, 'data-type': True,
|
'data-author': True, 'data-type': True,
|
||||||
'data-home-title': True
|
'data-home-title': True
|
||||||
}
|
}):
|
||||||
}):
|
|
||||||
title = a['title']
|
title = a['title']
|
||||||
url = a['href']
|
url = a['href']
|
||||||
author = a['data-author']
|
author = a['data-author']
|
||||||
|
@ -19,8 +19,6 @@ class AmericanProspect(BasicNewsRecipe):
|
|||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id=['title', 'content']),
|
dict(id=['title', 'content']),
|
||||||
]
|
]
|
||||||
|
@ -18,8 +18,6 @@ class Arbetaren_SE(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'sv'
|
language = 'sv'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
auto_cleanup_keep = '//div[@class="thumbnail"]'
|
auto_cleanup_keep = '//div[@class="thumbnail"]|//div[@id="article-image"]|//span[@class="important"]'
|
||||||
auto_cleanup_keep = '//div[@id="article-image"]'
|
|
||||||
auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
|
|
||||||
|
|
||||||
feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')]
|
feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')]
|
||||||
|
@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
|
|||||||
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
|
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
|
||||||
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
|
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
|
||||||
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
|
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
|
||||||
(u"BC", u"https://www.arcamax.com/thefunnies/bc"),
|
(u'BC', u'https://www.arcamax.com/thefunnies/bc'),
|
||||||
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
|
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
|
||||||
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
|
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
|
||||||
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"),
|
(u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
|
||||||
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
|
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
|
||||||
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
|
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
|
||||||
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
|
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
|
||||||
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
|
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
|
||||||
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"),
|
(u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
|
||||||
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
|
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
|
||||||
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
|
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
|
||||||
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"),
|
(u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
|
||||||
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"),
|
(u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
|
||||||
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
|
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
|
||||||
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
|
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
|
||||||
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
|
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
|
||||||
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
|
|||||||
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
|
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
|
||||||
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
|
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
|
||||||
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
|
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
|
||||||
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"),
|
(u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
|
||||||
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
|
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
|
||||||
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
|
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
|
||||||
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
|
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
|
||||||
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
|
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
|
||||||
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
|
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
|
||||||
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
|
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
|
||||||
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"),
|
(u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
|
||||||
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"),
|
(u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
|
||||||
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"),
|
(u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
|
||||||
]:
|
]:
|
||||||
self.log('Finding strips for:', title)
|
self.log('Finding strips for:', title)
|
||||||
articles = self.make_links(url, title)
|
articles = self.make_links(url, title)
|
||||||
|
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class ArretSurImages(BasicNewsRecipe):
|
class ArretSurImages(BasicNewsRecipe):
|
||||||
title = 'Arrêt sur Images'
|
title = 'Arrêt sur Images'
|
||||||
description = 'Site français d\'analyse des médias'
|
description = "Site français d'analyse des médias"
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def default_cover(self, cover_file):
|
def default_cover(self, cover_file):
|
||||||
"""
|
'''
|
||||||
Crée une couverture personnalisée avec le logo ASI
|
Crée une couverture personnalisée avec le logo ASI
|
||||||
"""
|
'''
|
||||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||||
|
|
||||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||||
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
|
|||||||
|
|
||||||
weekday = french_weekday[wkd]
|
weekday = french_weekday[wkd]
|
||||||
month = french_month[today.month]
|
month = french_month[today.month]
|
||||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||||
edition = today.strftime('Édition de %Hh')
|
edition = today.strftime('Édition de %Hh')
|
||||||
|
|
||||||
img = QImage(1400, 1920, QImage.Format_RGB888)
|
img = QImage(1400, 1920, QImage.Format_RGB888)
|
||||||
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
|
|||||||
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
|
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
|
||||||
print('Authentification réussie')
|
print('Authentification réussie')
|
||||||
else:
|
else:
|
||||||
print('Échec de l\'authentification - Vérifiez vos identifiants')
|
print("Échec de l'authentification - Vérifiez vos identifiants")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'Erreur lors de l\'authentification: {str(e)}')
|
print(f"Erreur lors de l'authentification: {e}")
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
@ -162,7 +162,7 @@ class ArretSurImages(BasicNewsRecipe):
|
|||||||
</html>
|
</html>
|
||||||
'''
|
'''
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'Erreur preprocessing HTML: {str(e)}')
|
print(f'Erreur preprocessing HTML: {e}')
|
||||||
return raw_html
|
return raw_html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -186,11 +186,11 @@ class ArretSurImages(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
tag.replace_with(img_tag)
|
tag.replace_with(img_tag)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'Erreur processing image: {str(e)}')
|
print(f'Erreur processing image: {e}')
|
||||||
tag.decompose()
|
tag.decompose()
|
||||||
else:
|
else:
|
||||||
tag.decompose()
|
tag.decompose()
|
||||||
return soup
|
return soup
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'Erreur preprocessing HTML: {str(e)}')
|
print(f'Erreur preprocessing HTML: {e}')
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = "GPL v3"
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
__copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
|
||||||
|
|
||||||
"""
|
'''
|
||||||
https://www.asahi.com/ajw/
|
https://www.asahi.com/ajw/
|
||||||
"""
|
'''
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||||
title = "The Asahi Shimbun"
|
title = 'The Asahi Shimbun'
|
||||||
__author__ = "Albert Aparicio Isarn"
|
__author__ = 'Albert Aparicio Isarn'
|
||||||
|
|
||||||
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
|
description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
|
||||||
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
|
' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
|
||||||
" coverage of cool Japan,focusing on manga, travel and other timely news.")
|
' coverage of cool Japan,focusing on manga, travel and other timely news.')
|
||||||
publisher = "The Asahi Shimbun Company"
|
publisher = 'The Asahi Shimbun Company'
|
||||||
publication_type = "newspaper"
|
publication_type = 'newspaper'
|
||||||
category = "news, japan"
|
category = 'news, japan'
|
||||||
language = "en_JP"
|
language = 'en_JP'
|
||||||
|
|
||||||
index = "https://www.asahi.com"
|
index = 'https://www.asahi.com'
|
||||||
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
|
masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
|
||||||
|
|
||||||
oldest_article = 3
|
oldest_article = 3
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
remove_tags_before = {"id": "MainInner"}
|
remove_tags_before = {'id': 'MainInner'}
|
||||||
remove_tags_after = {"class": "ArticleText"}
|
remove_tags_after = {'class': 'ArticleText'}
|
||||||
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
|
remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
|
||||||
|
|
||||||
def get_whats_new(self):
|
def get_whats_new(self):
|
||||||
soup = self.index_to_soup(self.index + "/ajw/new")
|
soup = self.index_to_soup(self.index + '/ajw/new')
|
||||||
news_section = soup.find("div", attrs={"class": "specialList"})
|
news_section = soup.find('div', attrs={'class': 'specialList'})
|
||||||
|
|
||||||
new_news = []
|
new_news = []
|
||||||
|
|
||||||
for item in news_section.findAll("li"):
|
for item in news_section.findAll('li'):
|
||||||
title = item.find("p", attrs={"class": "title"}).string
|
title = item.find('p', attrs={'class': 'title'}).string
|
||||||
date_string = item.find("p", attrs={"class": "date"}).next
|
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||||
date = date_string.strip()
|
date = date_string.strip()
|
||||||
url = self.index + item.find("a")["href"]
|
url = self.index + item.find('a')['href']
|
||||||
|
|
||||||
new_news.append(
|
new_news.append(
|
||||||
{
|
{
|
||||||
"title": title,
|
'title': title,
|
||||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||||
"url": url,
|
'url': url,
|
||||||
"description": "",
|
'description': '',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return new_news
|
return new_news
|
||||||
|
|
||||||
def get_top6(self, soup):
|
def get_top6(self, soup):
|
||||||
top = soup.find("ul", attrs={"class": "top6"})
|
top = soup.find('ul', attrs={'class': 'top6'})
|
||||||
|
|
||||||
top6_news = []
|
top6_news = []
|
||||||
|
|
||||||
for item in top.findAll("li"):
|
for item in top.findAll('li'):
|
||||||
title = item.find("p", attrs={"class": "title"}).string
|
title = item.find('p', attrs={'class': 'title'}).string
|
||||||
date_string = item.find("p", attrs={"class": "date"}).next
|
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||||
date = date_string.strip()
|
date = date_string.strip()
|
||||||
url = self.index + item.find("a")["href"]
|
url = self.index + item.find('a')['href']
|
||||||
|
|
||||||
top6_news.append(
|
top6_news.append(
|
||||||
{
|
{
|
||||||
"title": title,
|
'title': title,
|
||||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||||
"url": url,
|
'url': url,
|
||||||
"description": "",
|
'description': '',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return top6_news
|
return top6_news
|
||||||
|
|
||||||
def get_section_news(self, soup):
|
def get_section_news(self, soup):
|
||||||
news_grid = soup.find("ul", attrs={"class": "default"})
|
news_grid = soup.find('ul', attrs={'class': 'default'})
|
||||||
|
|
||||||
news = []
|
news = []
|
||||||
|
|
||||||
for item in news_grid.findAll("li"):
|
for item in news_grid.findAll('li'):
|
||||||
title = item.find("p", attrs={"class": "title"}).string
|
title = item.find('p', attrs={'class': 'title'}).string
|
||||||
date_string = item.find("p", attrs={"class": "date"}).next
|
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||||
date = date_string.strip()
|
date = date_string.strip()
|
||||||
|
|
||||||
url = self.index + item.find("a")["href"]
|
url = self.index + item.find('a')['href']
|
||||||
|
|
||||||
news.append(
|
news.append(
|
||||||
{
|
{
|
||||||
"title": title,
|
'title': title,
|
||||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||||
"url": url,
|
'url': url,
|
||||||
"description": "",
|
'description': '',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return news
|
return news
|
||||||
|
|
||||||
def get_section(self, section):
|
def get_section(self, section):
|
||||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||||
|
|
||||||
section_news_items = self.get_top6(soup)
|
section_news_items = self.get_top6(soup)
|
||||||
section_news_items.extend(self.get_section_news(soup))
|
section_news_items.extend(self.get_section_news(soup))
|
||||||
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
|||||||
return section_news_items
|
return section_news_items
|
||||||
|
|
||||||
def get_special_section(self, section):
|
def get_special_section(self, section):
|
||||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||||
top = soup.find("div", attrs={"class": "Section"})
|
top = soup.find('div', attrs={'class': 'Section'})
|
||||||
|
|
||||||
special_news = []
|
special_news = []
|
||||||
|
|
||||||
for item in top.findAll("li"):
|
for item in top.findAll('li'):
|
||||||
item_a = item.find("a")
|
item_a = item.find('a')
|
||||||
|
|
||||||
text_split = item_a.text.strip().split("\n")
|
text_split = item_a.text.strip().split('\n')
|
||||||
title = text_split[0]
|
title = text_split[0]
|
||||||
description = text_split[1].strip()
|
description = text_split[1].strip()
|
||||||
|
|
||||||
url = self.index + item_a["href"]
|
url = self.index + item_a['href']
|
||||||
|
|
||||||
special_news.append(
|
special_news.append(
|
||||||
{
|
{
|
||||||
"title": title,
|
'title': title,
|
||||||
"date": "",
|
'date': '',
|
||||||
"url": url,
|
'url': url,
|
||||||
"description": description,
|
'description': description,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
("What's New", self.get_whats_new()),
|
("What's New", self.get_whats_new()),
|
||||||
("National Report", self.get_section("national_report")),
|
('National Report', self.get_section('national_report')),
|
||||||
("Politics", self.get_section("politics")),
|
('Politics', self.get_section('politics')),
|
||||||
("Business", self.get_section("business")),
|
('Business', self.get_section('business')),
|
||||||
("Asia & World - China", self.get_section("asia_world/china")),
|
('Asia & World - China', self.get_section('asia_world/china')),
|
||||||
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
|
('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
|
||||||
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
|
('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
|
||||||
("Asia & World - World", self.get_section("asia_world/world")),
|
('Asia & World - World', self.get_section('asia_world/world')),
|
||||||
("Sci & Tech", self.get_section("sci_tech")),
|
('Sci & Tech', self.get_section('sci_tech')),
|
||||||
("Culture - Style", self.get_section("culture/style")),
|
('Culture - Style', self.get_section('culture/style')),
|
||||||
# ("Culture - Cooking", self.get_section("culture/cooking")),
|
# ("Culture - Cooking", self.get_section("culture/cooking")),
|
||||||
("Culture - Movies", self.get_section("culture/movies")),
|
('Culture - Movies', self.get_section('culture/movies')),
|
||||||
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
|
('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
|
||||||
("Travel", self.get_section("travel")),
|
('Travel', self.get_section('travel')),
|
||||||
("Sports", self.get_section("sports")),
|
('Sports', self.get_section('sports')),
|
||||||
("Opinion - Editorial", self.get_section("opinion/editorial")),
|
('Opinion - Editorial', self.get_section('opinion/editorial')),
|
||||||
("Opinion - Vox Populi", self.get_section("opinion/vox")),
|
('Opinion - Vox Populi', self.get_section('opinion/vox')),
|
||||||
("Opinion - Views", self.get_section("opinion/views")),
|
('Opinion - Views', self.get_section('opinion/views')),
|
||||||
("Special", self.get_special_section("special")),
|
('Special', self.get_special_section('special')),
|
||||||
]
|
]
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||||
title = 'The Asian Review of Books'
|
title = 'The Asian Review of Books'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
|
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa: E501
|
||||||
publisher = 'The Asian Review of Books'
|
publisher = 'The Asian Review of Books'
|
||||||
category = 'literature, books, reviews, Asia'
|
category = 'literature, books, reviews, Asia'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
|||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
|
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
body{font-family: "Droid Serif", serif}
|
body{font-family: "Droid Serif", serif}
|
||||||
.entry-title {font-family: "Playfair Display", serif}
|
.entry-title {font-family: "Playfair Display", serif}
|
||||||
img {display: block}
|
img {display: block}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
recipe_specific_options = {
|
recipe_specific_options = {
|
||||||
'days': {
|
'days': {
|
||||||
|
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AstroNEWS(BasicNewsRecipe):
|
class AstroNEWS(BasicNewsRecipe):
|
||||||
title = u'AstroNEWS'
|
title = u'AstroNEWS'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
|
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa: E501
|
||||||
category = 'astronomy, science'
|
category = 'astronomy, science'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
|
@ -12,7 +12,7 @@ test_article = None
|
|||||||
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||||
|
|
||||||
|
|
||||||
# {{{ parse article JSON
|
# parse article JSON {{{
|
||||||
def process_image_block(lines, block):
|
def process_image_block(lines, block):
|
||||||
caption = block.get('captionText')
|
caption = block.get('captionText')
|
||||||
caption_lines = []
|
caption_lines = []
|
||||||
|
@ -12,7 +12,7 @@ test_article = None
|
|||||||
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||||
|
|
||||||
|
|
||||||
# {{{ parse article JSON
|
# parse article JSON {{{
|
||||||
def process_image_block(lines, block):
|
def process_image_block(lines, block):
|
||||||
caption = block.get('captionText')
|
caption = block.get('captionText')
|
||||||
caption_lines = []
|
caption_lines = []
|
||||||
|
@ -11,7 +11,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||||
title = u'attac.es'
|
title = u'attac.es'
|
||||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
|
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa: E501
|
||||||
url = 'http://www.attac.es'
|
url = 'http://www.attac.es'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
tags = 'contrainformación, información alternativa'
|
tags = 'contrainformación, información alternativa'
|
||||||
|
@ -24,4 +24,3 @@ class WwwAvisen_dk(BasicNewsRecipe):
|
|||||||
feeds = [
|
feeds = [
|
||||||
('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'),
|
('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
|
|||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
||||||
INDEX = 'http://www.buenosairesherald.com'
|
INDEX = 'http://www.buenosairesherald.com'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em; display:block}
|
img{margin-bottom: 0.4em; display:block}
|
||||||
h1{font-family: Georgia,serif}
|
h1{font-family: Georgia,serif}
|
||||||
#fecha{text-align: right; font-size: small}
|
#fecha{text-align: right; font-size: small}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
|
@ -22,21 +22,21 @@ class AdvancedUserRecipe1718382046(BasicNewsRecipe):
|
|||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
#Gardening
|
# Gardening
|
||||||
('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'),
|
('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'),
|
||||||
('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'),
|
('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'),
|
||||||
('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'),
|
('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'),
|
||||||
('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'),
|
('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'),
|
||||||
('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'),
|
('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'),
|
||||||
('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'),
|
('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'),
|
||||||
#Backyard
|
# Backyard
|
||||||
('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'),
|
('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'),
|
||||||
('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'),
|
('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'),
|
||||||
('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'),
|
('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'),
|
||||||
('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'),
|
('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'),
|
||||||
('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'),
|
('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'),
|
||||||
('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'),
|
('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'),
|
||||||
#DIY & Project
|
# DIY & Project
|
||||||
('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'),
|
('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'),
|
||||||
('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'),
|
('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'),
|
||||||
('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'),
|
('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'),
|
||||||
|
@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class BaikalJournal(BasicNewsRecipe):
|
class BaikalJournal(BasicNewsRecipe):
|
||||||
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
|
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa
|
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa: E501
|
||||||
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa
|
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa: E501
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
|
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
|
||||||
language = 'ru'
|
language = 'ru'
|
||||||
|
@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
|
|||||||
title = u'Bangkok Post'
|
title = u'Bangkok Post'
|
||||||
publisher = u'Post Publishing PCL'
|
publisher = u'Post Publishing PCL'
|
||||||
category = u'News'
|
category = u'News'
|
||||||
description = u'The world\'s window to Thailand'
|
description = u"The world's window to Thailand"
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -26,7 +26,7 @@ class bar(BasicNewsRecipe):
|
|||||||
prefixed_classes(
|
prefixed_classes(
|
||||||
'text-story-m_story-tags__ story-footer-module__metype__'
|
'text-story-m_story-tags__ story-footer-module__metype__'
|
||||||
),
|
),
|
||||||
dict(name = 'svg')
|
dict(name='svg')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
|
|||||||
|
|
||||||
|
|
||||||
class barrons(BasicNewsRecipe):
|
class barrons(BasicNewsRecipe):
|
||||||
title = 'Barron\'s Magazine'
|
title = "Barron's Magazine"
|
||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
description = (
|
description = (
|
||||||
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister '
|
"Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
|
||||||
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and '
|
"publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
|
||||||
'relevant statistics.'
|
'relevant statistics.'
|
||||||
)
|
)
|
||||||
language = 'en_US'
|
language = 'en_US'
|
||||||
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
|
|||||||
recipe_specific_options = {
|
recipe_specific_options = {
|
||||||
'date': {
|
'date': {
|
||||||
'short': 'The date of the edition to download (YYYYMMDD format)',
|
'short': 'The date of the edition to download (YYYYMMDD format)',
|
||||||
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
|
'long': "For example, 20240722.\nIf it didn't work, try again later."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ class barrons(BasicNewsRecipe):
|
|||||||
byl = articles.find(**prefixed_classes('BarronsTheme--byline--'))
|
byl = articles.find(**prefixed_classes('BarronsTheme--byline--'))
|
||||||
if byl:
|
if byl:
|
||||||
desc += self.tag_to_string(byl)
|
desc += self.tag_to_string(byl)
|
||||||
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
|
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
|
||||||
if ttr:
|
if ttr:
|
||||||
desc += self.tag_to_string(ttr)
|
desc += self.tag_to_string(ttr)
|
||||||
summ = articles.find(**prefixed_classes('BarronsTheme--summary--'))
|
summ = articles.find(**prefixed_classes('BarronsTheme--summary--'))
|
||||||
@ -127,7 +127,7 @@ class barrons(BasicNewsRecipe):
|
|||||||
desc += ' | ' + self.tag_to_string(summ)
|
desc += ' | ' + self.tag_to_string(summ)
|
||||||
self.log('\t', title, ' ', url, '\n\t', desc)
|
self.log('\t', title, ' ', url, '\n\t', desc)
|
||||||
ans[section].append({'title': title, 'url': url, 'description': desc})
|
ans[section].append({'title': title, 'url': url, 'description': desc})
|
||||||
return [(section, articles) for section, articles in ans.items()]
|
return list(ans.items())
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.split('?')[0].replace('/articles/', '/amp/articles/')
|
return url.split('?')[0].replace('/articles/', '/amp/articles/')
|
||||||
|
@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Select / de-select the feeds you want in your ebook.
|
# Select / de-select the feeds you want in your ebook.
|
||||||
feeds = [
|
feeds = [
|
||||||
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"),
|
('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
|
||||||
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"),
|
('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
|
||||||
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"),
|
('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
|
||||||
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
|
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
|
||||||
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
|
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
|
||||||
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
|
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
|
||||||
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
|
|||||||
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
|
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
|
||||||
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
|
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
|
||||||
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
|
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
|
||||||
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
|
('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
|
||||||
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"),
|
('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
|
||||||
("Science/Environment",
|
('Science/Environment',
|
||||||
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
|
'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
|
||||||
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"),
|
('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
|
||||||
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"),
|
('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
|
||||||
("Entertainment/Arts",
|
('Entertainment/Arts',
|
||||||
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
|
'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
|
||||||
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
|
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
|
||||||
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
|
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
|
||||||
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"),
|
('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
|
||||||
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"),
|
('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
|
||||||
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
|
('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
|
||||||
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
|
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
|
||||||
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
|
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
|
||||||
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
|
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
|
||||||
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
|
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
|
||||||
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
|
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
|
||||||
("Sport Front Page",
|
('Sport Front Page',
|
||||||
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
|
'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
|
||||||
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
|
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
|
||||||
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
|
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
|
||||||
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
|
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
|
||||||
|
@ -201,24 +201,24 @@ class BBCBrasilRecipe(BasicNewsRecipe):
|
|||||||
conversion_options = {'smarten_punctuation': True}
|
conversion_options = {'smarten_punctuation': True}
|
||||||
|
|
||||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
extra_css = '''body { font-family: verdana, helvetica, sans-serif; }
|
||||||
.introduction, .first { font-weight: bold; } \
|
.introduction, .first { font-weight: bold; }
|
||||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
.cross-head { font-weight: bold; font-size: 125%; }
|
||||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
.cap, .caption { display: block; font-size: 80%; font-style: italic; }
|
||||||
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
|
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; }
|
||||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position,
|
||||||
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \
|
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block;
|
||||||
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
|
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; }
|
||||||
.story-date, .published, .datestamp { font-size: 80%; } \
|
.story-date, .published, .datestamp { font-size: 80%; }
|
||||||
table { width: 100%; } \
|
table { width: 100%; }
|
||||||
td img { display: block; margin: 5px auto; } \
|
td img { display: block; margin: 5px auto; }
|
||||||
ul { padding-top: 10px; } \
|
ul { padding-top: 10px; }
|
||||||
ol { padding-top: 10px; } \
|
ol { padding-top: 10px; }
|
||||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
li { padding-top: 5px; padding-bottom: 5px; }
|
||||||
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
h1 { text-align: center; font-size: 175%; font-weight: bold; }
|
||||||
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
|
h2 { text-align: center; font-size: 150%; font-weight: bold; }
|
||||||
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
|
h3 { text-align: center; font-size: 125%; font-weight: bold; }
|
||||||
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
|
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'''
|
||||||
|
|
||||||
# Remove various tag attributes to improve the look of the ebook pages.
|
# Remove various tag attributes to improve the look of the ebook pages.
|
||||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||||
@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
|
||||||
# Handle sports page urls type 01:
|
# Handle sports page urls type 01:
|
||||||
if (url.find("go/rss/-/sport1/") != -1):
|
if (url.find('go/rss/-/sport1/') != -1):
|
||||||
temp_url = url.replace("go/rss/-/", "")
|
temp_url = url.replace('go/rss/-/', '')
|
||||||
|
|
||||||
# Handle sports page urls type 02:
|
# Handle sports page urls type 02:
|
||||||
elif (url.find("go/rss/int/news/-/sport1/") != -1):
|
elif (url.find('go/rss/int/news/-/sport1/') != -1):
|
||||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||||
|
|
||||||
# Handle regular news page urls:
|
# Handle regular news page urls:
|
||||||
else:
|
else:
|
||||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||||
|
|
||||||
# Always add "?print=true" to the end of the url.
|
# Always add "?print=true" to the end of the url.
|
||||||
print_url = temp_url + "?print=true"
|
print_url = temp_url + '?print=true'
|
||||||
|
|
||||||
return print_url
|
return print_url
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
remove_tags_after = dict(name='div', attrs={'class': [' g-w8']})
|
remove_tags_after = dict(name='div', attrs={'class': [' g-w8']})
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa
|
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa: E501
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -134,7 +134,7 @@ if __name__ == '__main__':
|
|||||||
class BBC(BasicNewsRecipe):
|
class BBC(BasicNewsRecipe):
|
||||||
title = 'BBC News (fast)'
|
title = 'BBC News (fast)'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa
|
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa: E501
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -21,7 +21,7 @@ class BBC(BasicNewsRecipe):
|
|||||||
category = 'sport, news, UK, world'
|
category = 'sport, news, UK, world'
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class BeforeWeGo(BasicNewsRecipe):
|
class BeforeWeGo(BasicNewsRecipe):
|
||||||
title = 'Before We Go'
|
title = 'Before We Go'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa
|
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa: E501
|
||||||
publisher = 'BEFOREWEGOBLOG'
|
publisher = 'BEFOREWEGOBLOG'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
|
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
|
||||||
cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png'
|
cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -24,9 +24,9 @@ class BeforeWeGo(BasicNewsRecipe):
|
|||||||
remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'})
|
remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'})
|
||||||
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
|
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
|
||||||
# remove_tags_after = dict(name='article')
|
# remove_tags_after = dict(name='article')
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),
|
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),
|
||||||
dict(name='span', attrs={'class': 'meta-comment-count'}),
|
dict(name='span', attrs={'class': 'meta-comment-count'}),
|
||||||
dict(name='p', attrs={'id': 'breadcrumbs'})
|
dict(name='p', attrs={'id': 'breadcrumbs'})
|
||||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Bellingcat(BasicNewsRecipe):
|
class Bellingcat(BasicNewsRecipe):
|
||||||
title = 'Bellingcat'
|
title = 'Bellingcat'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa
|
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa: E501
|
||||||
publisher = 'Stichting Bellingcat'
|
publisher = 'Stichting Bellingcat'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Bellingcat(BasicNewsRecipe):
|
class Bellingcat(BasicNewsRecipe):
|
||||||
title = 'Bellingcat'
|
title = 'Bellingcat'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa
|
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa: E501
|
||||||
publisher = 'Stichting Bellingcat'
|
publisher = 'Stichting Bellingcat'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Bellingcat(BasicNewsRecipe):
|
class Bellingcat(BasicNewsRecipe):
|
||||||
title = 'Bellingcat'
|
title = 'Bellingcat'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa
|
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa: E501
|
||||||
publisher = 'Stichting Bellingcat'
|
publisher = 'Stichting Bellingcat'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Bellingcat(BasicNewsRecipe):
|
class Bellingcat(BasicNewsRecipe):
|
||||||
title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
|
title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa
|
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa: E501
|
||||||
publisher = 'Stichting Bellingcat'
|
publisher = 'Stichting Bellingcat'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Bellingcat(BasicNewsRecipe):
|
class Bellingcat(BasicNewsRecipe):
|
||||||
title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
|
title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa
|
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa: E501
|
||||||
publisher = 'Stichting Bellingcat'
|
publisher = 'Stichting Bellingcat'
|
||||||
category = 'blog'
|
category = 'blog'
|
||||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||||
|
@ -17,13 +17,13 @@ class BenchmarkPl(BasicNewsRecipe):
|
|||||||
extra_css = 'ul {list-style-type: none;}'
|
extra_css = 'ul {list-style-type: none;}'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa
|
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa: E501, RUF039
|
||||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: RUF039
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
|
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
|
||||||
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
|
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
|
||||||
remove_tags_after = dict(id='article')
|
remove_tags_after = dict(id='article')
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={ 'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa
|
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa: E501
|
||||||
|
|
||||||
INDEX = 'http://www.benchmark.pl'
|
INDEX = 'http://www.benchmark.pl'
|
||||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||||
|
@ -25,7 +25,6 @@ class bergfreunde_blog(BasicNewsRecipe):
|
|||||||
__author__ = 'VoHe'
|
__author__ = 'VoHe'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
|
@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
dict(
|
dict(
|
||||||
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
|
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
|
||||||
dict(name='img', attrs={'alt': 'logo'}),
|
dict(name='img', attrs={'alt': 'logo'}),
|
||||||
dict(name='div', attrs={'class': re.compile('infoEl')}),
|
dict(name='div', attrs={'class': re.compile(r'infoEl')}),
|
||||||
dict(name='span', attrs={'class': re.compile('loupe')})
|
dict(name='span', attrs={'class': re.compile(r'loupe')})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'itemprop': re.compile('articleBody')})
|
dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
|
|||||||
feeds.append(("O'Reilly Factor", articles_shows))
|
feeds.append(("O'Reilly Factor", articles_shows))
|
||||||
|
|
||||||
if articles_columns:
|
if articles_columns:
|
||||||
feeds.append(("Newspaper Column", articles_columns))
|
feeds.append(('Newspaper Column', articles_columns))
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
@ -49,8 +49,7 @@ class BillOReilly(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.billoreilly.com' + url + \
|
url = 'http://www.billoreilly.com' + url + '&dest=/pg/jsp/community/tvshowprint.jsp'
|
||||||
'&dest=/pg/jsp/community/tvshowprint.jsp'
|
|
||||||
|
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
self.log('\t\t\t', url)
|
self.log('\t\t\t', url)
|
||||||
|
@ -57,8 +57,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.birminghammail.co.uk')
|
soup = self.index_to_soup('http://www.birminghammail.co.uk')
|
||||||
cov = soup.find(attrs={'src': re.compile(
|
cov = soup.find(attrs={'src': re.compile(r'http://images.icnetwork.co.uk/upl/birm')})
|
||||||
'http://images.icnetwork.co.uk/upl/birm')})
|
|
||||||
cov = str(cov)
|
cov = str(cov)
|
||||||
cov2 = re.findall(
|
cov2 = re.findall(
|
||||||
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||||
|
@ -27,15 +27,15 @@ class bleskRecipe(BasicNewsRecipe):
|
|||||||
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
"""
|
'''
|
||||||
|
|
||||||
remove_attributes = []
|
remove_attributes = []
|
||||||
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
|
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
|
||||||
remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']})
|
remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']})
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
|
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
|
||||||
dict(name='div', attrs={'id': ['partHeader']}),
|
dict(name='div', attrs={'id': ['partHeader']}),
|
||||||
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
|
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
|
||||||
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*',
|
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*',
|
||||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Blic(BasicNewsRecipe):
|
class Blic(BasicNewsRecipe):
|
||||||
title = 'Blic'
|
title = 'Blic'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa
|
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa: E501
|
||||||
publisher = 'RINGIER d.o.o.'
|
publisher = 'RINGIER d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
|
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Georgia, serif1, serif}
|
body{font-family: Georgia, serif1, serif}
|
||||||
@ -35,13 +35,13 @@ class Blic(BasicNewsRecipe):
|
|||||||
.potpis{font-size: x-small; color: gray}
|
.potpis{font-size: x-small; color: gray}
|
||||||
.article_info{font-size: small}
|
.article_info{font-size: small}
|
||||||
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
|
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039
|
||||||
remove_tags_before = dict(name='div', attrs={'id': 'article_info'})
|
remove_tags_before = dict(name='div', attrs={'id': 'article_info'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])]
|
dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])]
|
||||||
|
@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
|
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
|
||||||
description = (
|
description = (
|
||||||
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
|
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
|
||||||
' companies, events, and trends shaping today\'s complex, global economy.'
|
" companies, events, and trends shaping today's complex, global economy."
|
||||||
)
|
)
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
@ -124,8 +124,8 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
||||||
|
|
||||||
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
||||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
|
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
|
||||||
elif 'summary' in data and data['summary']:
|
elif data.get('summary'):
|
||||||
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
||||||
|
|
||||||
if 'byline' in data and data['byline'] is not None:
|
if 'byline' in data and data['byline'] is not None:
|
||||||
|
@ -58,7 +58,7 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
'Bloomberg delivers business and markets news, data, analysis, and video'
|
'Bloomberg delivers business and markets news, data, analysis, and video'
|
||||||
' to the world, featuring stories from Businessweek and Bloomberg News.'
|
' to the world, featuring stories from Businessweek and Bloomberg News.'
|
||||||
)
|
)
|
||||||
oldest_article = 1.2 # days
|
oldest_article = 1.2 # days
|
||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg'
|
cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg'
|
||||||
@ -134,8 +134,8 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
||||||
|
|
||||||
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
||||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
|
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
|
||||||
elif 'summary' in data and data['summary']:
|
elif data.get('summary'):
|
||||||
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
||||||
|
|
||||||
if 'byline' in data and data['byline'] is not None:
|
if 'byline' in data and data['byline'] is not None:
|
||||||
|
@ -2,29 +2,29 @@ from urllib.parse import urljoin
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
_issue_url = ""
|
_issue_url = ''
|
||||||
|
|
||||||
|
|
||||||
class BookforumMagazine(BasicNewsRecipe):
|
class BookforumMagazine(BasicNewsRecipe):
|
||||||
title = "Bookforum"
|
title = 'Bookforum'
|
||||||
description = (
|
description = (
|
||||||
"Bookforum is an American book review magazine devoted to books and "
|
'Bookforum is an American book review magazine devoted to books and '
|
||||||
"the discussion of literature. https://www.bookforum.com/print"
|
'the discussion of literature. https://www.bookforum.com/print'
|
||||||
)
|
)
|
||||||
language = "en"
|
language = 'en'
|
||||||
__author__ = "ping"
|
__author__ = 'ping'
|
||||||
publication_type = "magazine"
|
publication_type = 'magazine'
|
||||||
encoding = "utf-8"
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_auto_size = 8
|
compress_news_images_auto_size = 8
|
||||||
|
|
||||||
keep_only_tags = [dict(class_="blog-article")]
|
keep_only_tags = [dict(class_='blog-article')]
|
||||||
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
|
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
|
||||||
|
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
||||||
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
||||||
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
|
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
|
||||||
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
|
|||||||
display: block; max-width: 100%; height: auto;
|
display: block; max-width: 100%; height: auto;
|
||||||
}
|
}
|
||||||
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
|
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
|
||||||
"""
|
'''
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
# strip away links that's not needed
|
# strip away links that's not needed
|
||||||
for ele in soup.select(".blog-article__header a"):
|
for ele in soup.select('.blog-article__header a'):
|
||||||
ele.unwrap()
|
ele.unwrap()
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup(
|
soup = self.index_to_soup(
|
||||||
_issue_url if _issue_url else "https://www.bookforum.com/print"
|
_issue_url if _issue_url else 'https://www.bookforum.com/print'
|
||||||
)
|
)
|
||||||
meta_ele = soup.find("meta", property="og:title")
|
meta_ele = soup.find('meta', property='og:title')
|
||||||
if meta_ele:
|
if meta_ele:
|
||||||
self.timefmt = f' [{meta_ele["content"]}]'
|
self.timefmt = f' [{meta_ele["content"]}]'
|
||||||
|
|
||||||
cover_ele = soup.find("img", class_="toc-issue__cover")
|
cover_ele = soup.find('img', class_='toc-issue__cover')
|
||||||
if cover_ele:
|
if cover_ele:
|
||||||
self.cover_url = urljoin(
|
self.cover_url = urljoin(
|
||||||
"https://www.bookforum.com",
|
'https://www.bookforum.com',
|
||||||
soup.find("img", class_="toc-issue__cover")["src"],
|
soup.find('img', class_='toc-issue__cover')['src'],
|
||||||
)
|
)
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
for sect_ele in soup.find_all("div", class_="toc-articles__section"):
|
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
|
||||||
section_name = self.tag_to_string(
|
section_name = self.tag_to_string(
|
||||||
sect_ele.find("a", class_="toc__anchor-links__link")
|
sect_ele.find('a', class_='toc__anchor-links__link')
|
||||||
)
|
)
|
||||||
for article_ele in sect_ele.find_all("article"):
|
for article_ele in sect_ele.find_all('article'):
|
||||||
title_ele = article_ele.find("h1")
|
title_ele = article_ele.find('h1')
|
||||||
sub_title_ele = article_ele.find(class_="toc-article__subtitle")
|
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
|
||||||
articles.setdefault(section_name, []).append(
|
articles.setdefault(section_name, []).append(
|
||||||
{
|
{
|
||||||
"title": self.tag_to_string(title_ele),
|
'title': self.tag_to_string(title_ele),
|
||||||
"url": article_ele.find("a", class_="toc-article__link")[
|
'url': article_ele.find('a', class_='toc-article__link')[
|
||||||
"href"
|
'href'
|
||||||
],
|
],
|
||||||
"description": self.tag_to_string(sub_title_ele)
|
'description': self.tag_to_string(sub_title_ele)
|
||||||
if sub_title_ele
|
if sub_title_ele
|
||||||
else "",
|
else '',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return articles.items()
|
return articles.items()
|
||||||
|
@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
|
|||||||
language = 'da'
|
language = 'da'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name="h1", attrs={'itemprop': 'headline'}),
|
dict(name='h1', attrs={'itemprop': 'headline'}),
|
||||||
dict(name="div", attrs={'itemprob': 'datePublished'}),
|
dict(name='div', attrs={'itemprob': 'datePublished'}),
|
||||||
dict(name="div", attrs={'itemprop': 'articleBody'}),
|
dict(name='div', attrs={'itemprop': 'articleBody'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Feed are found here:
|
# Feed are found here:
|
||||||
|
@ -42,24 +42,24 @@ def class_startswith(*prefixes):
|
|||||||
|
|
||||||
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
|
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
|
||||||
comics_to_fetch = {
|
comics_to_fetch = {
|
||||||
"ADAM@HOME": 'ad',
|
'ADAM@HOME': 'ad',
|
||||||
"ARLO & JANIS": 'aj',
|
'ARLO & JANIS': 'aj',
|
||||||
# "CUL DE SAC": 'cds',
|
# "CUL DE SAC": 'cds',
|
||||||
# "CURTIS": 'kfcrt',
|
# "CURTIS": 'kfcrt',
|
||||||
"DILBERT": 'dt',
|
'DILBERT': 'dt',
|
||||||
"DOONESBURY": 'db',
|
'DOONESBURY': 'db',
|
||||||
"DUSTIN": 'kfdus',
|
'DUSTIN': 'kfdus',
|
||||||
"F MINUS": 'fm',
|
'F MINUS': 'fm',
|
||||||
"FOR BETTER OR WORSE": 'fb',
|
'FOR BETTER OR WORSE': 'fb',
|
||||||
# "GET FUZZY": 'gz',
|
# "GET FUZZY": 'gz',
|
||||||
# "MOTHER GOOSE & GRIMM": 'tmmgg',
|
# "MOTHER GOOSE & GRIMM": 'tmmgg',
|
||||||
# "JUMPSTART": 'jt',
|
# "JUMPSTART": 'jt',
|
||||||
"MONTY": 'mt',
|
'MONTY': 'mt',
|
||||||
# "POOCH CAFE",
|
# "POOCH CAFE",
|
||||||
"RHYMES WITH ORANGE": 'kfrwo',
|
'RHYMES WITH ORANGE': 'kfrwo',
|
||||||
# "ROSE IS ROSE": 'rr',
|
# "ROSE IS ROSE": 'rr',
|
||||||
# "ZIPPY THE PINHEAD": 'kfzpy',
|
# "ZIPPY THE PINHEAD": 'kfzpy',
|
||||||
"ZITS": 'kfzt'
|
'ZITS': 'kfzt'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -77,10 +77,10 @@ def extract_json(raw_html):
|
|||||||
|
|
||||||
|
|
||||||
def absolutize_url(url):
|
def absolutize_url(url):
|
||||||
if url.startswith("//"):
|
if url.startswith('//'):
|
||||||
return "https:" + url
|
return 'https:' + url
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = "https://www.bostonglobe.com" + url
|
url = 'https://www.bostonglobe.com' + url
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ def main():
|
|||||||
|
|
||||||
class BostonGlobeSubscription(BasicNewsRecipe):
|
class BostonGlobeSubscription(BasicNewsRecipe):
|
||||||
|
|
||||||
title = "Boston Globe"
|
title = 'Boston Globe'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'The Boston Globe'
|
description = 'The Boston Globe'
|
||||||
language = 'en_US'
|
language = 'en_US'
|
||||||
|
@ -12,6 +12,7 @@ def class_as_string(x):
|
|||||||
x = ' '.join(x)
|
x = ' '.join(x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
def class_startswith(*prefixes):
|
def class_startswith(*prefixes):
|
||||||
|
|
||||||
def q(x):
|
def q(x):
|
||||||
@ -24,18 +25,19 @@ def class_startswith(*prefixes):
|
|||||||
|
|
||||||
return dict(attrs={'class': q})
|
return dict(attrs={'class': q})
|
||||||
|
|
||||||
|
|
||||||
def absolutize_url(url):
|
def absolutize_url(url):
|
||||||
if url.startswith("//"):
|
if url.startswith('//'):
|
||||||
return "https:" + url
|
return 'https:' + url
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = "https://www.bostonglobe.com" + url
|
url = 'https://www.bostonglobe.com' + url
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
class BostonGlobePrint(BasicNewsRecipe):
|
class BostonGlobePrint(BasicNewsRecipe):
|
||||||
title = "Boston Globe | Print Edition"
|
title = 'Boston Globe | Print Edition'
|
||||||
__author__ = 'Kovid Goyal, unkn0wn'
|
__author__ = 'Kovid Goyal, unkn0wn'
|
||||||
description = 'The Boston Globe - Today\'s Paper'
|
description = "The Boston Globe - Today's Paper"
|
||||||
language = 'en_US'
|
language = 'en_US'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
@ -70,7 +72,7 @@ class BostonGlobePrint(BasicNewsRecipe):
|
|||||||
for image in soup.findAll('img', src=True):
|
for image in soup.findAll('img', src=True):
|
||||||
if image['src'].endswith('750.jpg'):
|
if image['src'].endswith('750.jpg'):
|
||||||
return 'https:' + image['src']
|
return 'https:' + image['src']
|
||||||
self.log("\nCover unavailable")
|
self.log('\nCover unavailable')
|
||||||
cover = None
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
@ -94,8 +96,8 @@ class BostonGlobePrint(BasicNewsRecipe):
|
|||||||
desc = self.tag_to_string(d)
|
desc = self.tag_to_string(d)
|
||||||
|
|
||||||
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
|
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
|
||||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
return list(feeds_dict.items())
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
soup = self.index_to_soup(raw_html)
|
soup = self.index_to_soup(raw_html)
|
||||||
|
@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1467715002(BasicNewsRecipe):
|
class AdvancedUserRecipe1467715002(BasicNewsRecipe):
|
||||||
title = 'Breaking Mad'
|
title = 'Breaking Mad'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa
|
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa: E501
|
||||||
publisher = 'BreakingMad'
|
publisher = 'BreakingMad'
|
||||||
category = 'news'
|
category = 'news'
|
||||||
cover_url = u'http://breakingmad.me/images/logo.png'
|
cover_url = u'http://breakingmad.me/images/logo.png'
|
||||||
|
@ -5,7 +5,6 @@ from __future__ import print_function
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -23,40 +22,39 @@ class brewiarz(BasicNewsRecipe):
|
|||||||
next_days = 1
|
next_days = 1
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
|
dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
|
||||||
"05": "v", "06": "vi", "07": "vii", "08": "viii",
|
'05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
|
||||||
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
|
'09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
|
||||||
|
|
||||||
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
|
weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
|
||||||
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
|
'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
|
||||||
|
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for i in range(0, self.next_days):
|
for i in range(self.next_days):
|
||||||
url_date = now + datetime.timedelta(days=i)
|
url_date = now + datetime.timedelta(days=i)
|
||||||
url_date_month = url_date.strftime("%m")
|
url_date_month = url_date.strftime('%m')
|
||||||
url_date_month_roman = dec2rom_dict[url_date_month]
|
url_date_month_roman = dec2rom_dict[url_date_month]
|
||||||
url_date_day = url_date.strftime("%d")
|
url_date_day = url_date.strftime('%d')
|
||||||
url_date_year = url_date.strftime("%Y")[2:]
|
url_date_year = url_date.strftime('%Y')[2:]
|
||||||
url_date_weekday = url_date.strftime("%A")
|
url_date_weekday = url_date.strftime('%A')
|
||||||
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
||||||
|
|
||||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \
|
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' +
|
||||||
url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
|
url_date_year + '/' + url_date_day + url_date_month + '/index.php3')
|
||||||
articles = self.parse_pages(url)
|
articles = self.parse_pages(url)
|
||||||
if articles:
|
if articles:
|
||||||
title = url_date_weekday_pl + " " + url_date_day + \
|
title = (url_date_weekday_pl + ' ' + url_date_day +
|
||||||
"." + url_date_month + "." + url_date_year
|
'.' + url_date_month + '.' + url_date_year)
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
else:
|
else:
|
||||||
sectors = self.get_sectors(url)
|
sectors = self.get_sectors(url)
|
||||||
for subpage in sectors:
|
for subpage in sectors:
|
||||||
title = url_date_weekday_pl + " " + url_date_day + "." + \
|
title = (url_date_weekday_pl + ' ' + url_date_day + '.' +
|
||||||
url_date_month + "." + url_date_year + " - " + subpage.string
|
url_date_month + '.' + url_date_year + ' - ' + subpage.string)
|
||||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \
|
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year +
|
||||||
"/" + url_date_day + url_date_month + \
|
'/' + url_date_day + url_date_month + '/' + subpage['href'])
|
||||||
"/" + subpage['href']
|
|
||||||
print(url)
|
print(url)
|
||||||
articles = self.parse_pages(url)
|
articles = self.parse_pages(url)
|
||||||
if articles:
|
if articles:
|
||||||
@ -91,9 +89,8 @@ class brewiarz(BasicNewsRecipe):
|
|||||||
sublinks = ol.findAll(name='a')
|
sublinks = ol.findAll(name='a')
|
||||||
for sublink in sublinks:
|
for sublink in sublinks:
|
||||||
link_title = self.tag_to_string(
|
link_title = self.tag_to_string(
|
||||||
link) + " - " + self.tag_to_string(sublink)
|
link) + ' - ' + self.tag_to_string(sublink)
|
||||||
link_url_print = re.sub(
|
link_url_print = sublink['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
|
||||||
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
|
|
||||||
link_url = url[:-10] + link_url_print
|
link_url = url[:-10] + link_url_print
|
||||||
current_articles.append({'title': link_title,
|
current_articles.append({'title': link_title,
|
||||||
'url': link_url, 'description': '', 'date': ''})
|
'url': link_url, 'description': '', 'date': ''})
|
||||||
@ -102,8 +99,7 @@ class brewiarz(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
link_title = self.tag_to_string(link)
|
link_title = self.tag_to_string(link)
|
||||||
link_url_print = re.sub(
|
link_url_print = link['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
|
||||||
'php3', 'php3?kr=_druk&wr=lg&', link['href'])
|
|
||||||
link_url = url[:-10] + link_url_print
|
link_url = url[:-10] + link_url_print
|
||||||
current_articles.append({'title': link_title,
|
current_articles.append({'title': link_title,
|
||||||
'url': link_url, 'description': '', 'date': ''})
|
'url': link_url, 'description': '', 'date': ''})
|
||||||
@ -145,7 +141,7 @@ class brewiarz(BasicNewsRecipe):
|
|||||||
if x == tag:
|
if x == tag:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print("Can't find", tag, "in", tag.parent)
|
print("Can't find", tag, 'in', tag.parent)
|
||||||
continue
|
continue
|
||||||
for r in reversed(tag.contents):
|
for r in reversed(tag.contents):
|
||||||
tag.parent.insert(i, r)
|
tag.parent.insert(i, r)
|
||||||
|
@ -16,7 +16,7 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||||
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa
|
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa: E501
|
||||||
dict(id=['header', 'artTools', 'context', 'interact',
|
dict(id=['header', 'artTools', 'context', 'interact',
|
||||||
'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
|
'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
|
||||||
dict(name=['hjtrs', 'kud'])]
|
dict(name=['hjtrs', 'kud'])]
|
||||||
|
@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
|
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
|
||||||
extra_css = """
|
extra_css = '''
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em; display:block}
|
img{margin-bottom: 0.4em; display:block}
|
||||||
"""
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
|
@ -15,8 +15,6 @@ class BusinessStandardPrint(BasicNewsRecipe):
|
|||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png'
|
masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
resolve_internal_links = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -64,7 +62,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
|
|||||||
if dt.weekday() == 6:
|
if dt.weekday() == 6:
|
||||||
self.log.warn(
|
self.log.warn(
|
||||||
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
|
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
|
||||||
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.'
|
" And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
|
||||||
)
|
)
|
||||||
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
|
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
|
||||||
raw = self.index_to_soup(url, raw=True)
|
raw = self.index_to_soup(url, raw=True)
|
||||||
|
@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Insert feeds in specified order, if available
|
# Insert feeds in specified order, if available
|
||||||
|
|
||||||
feedSort = ['Editor\'s Note', 'Editors note']
|
feedSort = ["Editor's Note", 'Editors note']
|
||||||
for i in feedSort:
|
for i in feedSort:
|
||||||
if i in sections:
|
if i in sections:
|
||||||
feeds.append((i, sections[i]))
|
feeds.append((i, sections[i]))
|
||||||
@ -98,8 +98,7 @@ class BT(BasicNewsRecipe):
|
|||||||
# Done with the sorted feeds
|
# Done with the sorted feeds
|
||||||
|
|
||||||
for i in feedSort:
|
for i in feedSort:
|
||||||
if i in sections:
|
sections.pop(i, None)
|
||||||
del sections[i]
|
|
||||||
|
|
||||||
# Append what is left over...
|
# Append what is left over...
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class CACM(BasicNewsRecipe):
|
class CACM(BasicNewsRecipe):
|
||||||
title = "ACM CACM Magazine"
|
title = 'ACM CACM Magazine'
|
||||||
description = "Published on day 1 of every month."
|
description = 'Published on day 1 of every month.'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
"""
|
'''
|
||||||
Parse out cover URL from cover page.
|
Parse out cover URL from cover page.
|
||||||
Example:
|
Example:
|
||||||
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
|
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
|
||||||
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
|
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
|
||||||
"""
|
'''
|
||||||
|
|
||||||
soup = self.index_to_soup("https://cacm.acm.org/")
|
soup = self.index_to_soup('https://cacm.acm.org/')
|
||||||
a_img = soup.find("a", class_="menuCover")
|
a_img = soup.find('a', class_='menuCover')
|
||||||
img_url = a_img.img["src"]
|
img_url = a_img.img['src']
|
||||||
img_url = img_url.split("?")[0]
|
img_url = img_url.split('?')[0]
|
||||||
img_url = img_url.replace(".large", "")
|
img_url = img_url.replace('.large', '')
|
||||||
return img_url
|
return img_url
|
||||||
|
@ -9,8 +9,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
|||||||
title = u'Calcalist'
|
title = u'Calcalist'
|
||||||
language = 'he'
|
language = 'he'
|
||||||
__author__ = 'marbs'
|
__author__ = 'marbs'
|
||||||
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa
|
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa: E501
|
||||||
simultaneous_downloads = 5
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
@ -23,34 +22,33 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class': 'ArticleBodyComponent'}),
|
dict(name='div', attrs={'class': 'ArticleBodyComponent'}),
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='p', attrs={'text': [' ']})]
|
remove_tags = [dict(name='p', attrs={'text': [' ']})]
|
||||||
max_articles_per_feed = 100
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<p> </p>', re.DOTALL | re.IGNORECASE), lambda match: '')
|
(re.compile(r'<p> </p>', re.DOTALL | re.IGNORECASE), lambda match: '')
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"),
|
(u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
|
||||||
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"),
|
(u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
|
||||||
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"),
|
(u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
|
||||||
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"),
|
(u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
|
||||||
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"),
|
(u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
|
||||||
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"),
|
(u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
|
||||||
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"),
|
(u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
|
||||||
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"),
|
(u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
|
||||||
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"),
|
(u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
|
||||||
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"),
|
(u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
|
||||||
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"),
|
(u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
|
||||||
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"),
|
(u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
|
||||||
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"),
|
(u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
|
||||||
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"),
|
(u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
|
||||||
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"),
|
(u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
|
||||||
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"),
|
(u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
|
||||||
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"),
|
(u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
|
||||||
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"),
|
(u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
|
||||||
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"),
|
(u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
|
||||||
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"),
|
(u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
|
||||||
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"),
|
(u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
|
||||||
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"),
|
(u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
|
||||||
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"),
|
(u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
|
||||||
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml")
|
(u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
|
||||||
]
|
]
|
||||||
|
@ -60,20 +60,20 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
# un-comment the following six lines for the Vancouver Province
|
# un-comment the following six lines for the Vancouver Province
|
||||||
# title = u'Vancouver Province'
|
# # title = u'Vancouver Province'
|
||||||
# url_prefix = 'http://www.theprovince.com'
|
# # url_prefix = 'http://www.theprovince.com'
|
||||||
# description = u'News from Vancouver, BC'
|
# # description = u'News from Vancouver, BC'
|
||||||
# std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
|
# # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
|
||||||
# logo_url = 'vplogo.jpg'
|
# # logo_url = 'vplogo.jpg'
|
||||||
# fp_tag = 'CAN_TP'
|
# # fp_tag = 'CAN_TP'
|
||||||
|
|
||||||
# un-comment the following six lines for the Vancouver Sun
|
# un-comment the following six lines for the Vancouver Sun
|
||||||
# title = u'Vancouver Sun'
|
# # title = u'Vancouver Sun'
|
||||||
# url_prefix = 'http://www.vancouversun.com'
|
# # url_prefix = 'http://www.vancouversun.com'
|
||||||
# description = u'News from Vancouver, BC'
|
# # description = u'News from Vancouver, BC'
|
||||||
# std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
|
# # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
|
||||||
# logo_url = 'vslogo.jpg'
|
# # logo_url = 'vslogo.jpg'
|
||||||
# fp_tag = 'CAN_VS'
|
# # fp_tag = 'CAN_VS'
|
||||||
|
|
||||||
# un-comment the following six lines for the Calgary Herald
|
# un-comment the following six lines for the Calgary Herald
|
||||||
title = u'Calgary Herald'
|
title = u'Calgary Herald'
|
||||||
@ -90,7 +90,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
|
# # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
|
||||||
# # logo_url = 'ejlogo.jpg'
|
# # logo_url = 'ejlogo.jpg'
|
||||||
# # fp_tag = 'CAN_EJ'
|
# # fp_tag = 'CAN_EJ'
|
||||||
#
|
|
||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
# # title = u'Ottawa Citizen'
|
# # title = u'Ottawa Citizen'
|
||||||
# # url_prefix = 'http://www.ottawacitizen.com'
|
# # url_prefix = 'http://www.ottawacitizen.com'
|
||||||
@ -98,7 +98,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
# # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
# # logo_url = 'oclogo.jpg'
|
# # logo_url = 'oclogo.jpg'
|
||||||
# # fp_tag = 'CAN_OC'
|
# # fp_tag = 'CAN_OC'
|
||||||
#
|
|
||||||
# un-comment the following six lines for the Montreal Gazette
|
# un-comment the following six lines for the Montreal Gazette
|
||||||
# # title = u'Montreal Gazette'
|
# # title = u'Montreal Gazette'
|
||||||
# # url_prefix = 'http://www.montrealgazette.com'
|
# # url_prefix = 'http://www.montrealgazette.com'
|
||||||
@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
|
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
|
||||||
|
|
||||||
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
|
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
|
||||||
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
|
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
|
||||||
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
name='div', attrs={'class': 'copyright'}),
|
name='div', attrs={'class': 'copyright'}),
|
||||||
dict(name='div', attrs={'class': 'rule_grey_solid'}),
|
dict(name='div', attrs={'class': 'rule_grey_solid'}),
|
||||||
dict(name='div', attrs={'id': 'soundoff'}),
|
dict(name='div', attrs={'id': 'soundoff'}),
|
||||||
dict(name='div', attrs={'id': re.compile('flyer')}),
|
dict(name='div', attrs={'id': re.compile(r'flyer')}),
|
||||||
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
|
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
while daysback < 7:
|
while daysback < 7:
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \
|
||||||
str((date.today() - timedelta(days=daysback)).day) + \
|
str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg'
|
||||||
'/lg/' + self.fp_tag + '.jpg'
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
try:
|
try:
|
||||||
br.open(cover)
|
br.open(cover)
|
||||||
@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
break
|
break
|
||||||
if daysback == 7:
|
if daysback == 7:
|
||||||
self.log("\nCover unavailable")
|
self.log('\nCover unavailable')
|
||||||
cover = None
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
def fixChars(self, string):
|
def fixChars(self, string):
|
||||||
# Replace lsquo (\x91)
|
# Replace lsquo (\x91)
|
||||||
fixed = re.sub("\x91", "‘", string)
|
fixed = string.replace('\x91', '‘')
|
||||||
# Replace rsquo (\x92)
|
# Replace rsquo (\x92)
|
||||||
fixed = re.sub("\x92", "’", fixed)
|
fixed = fixed.replace('\x92', '’')
|
||||||
# Replace ldquo (\x93)
|
# Replace ldquo (\x93)
|
||||||
fixed = re.sub("\x93", "“", fixed)
|
fixed = fixed.replace('\x93', '“')
|
||||||
# Replace rdquo (\x94)
|
# Replace rdquo (\x94)
|
||||||
fixed = re.sub("\x94", "”", fixed)
|
fixed = fixed.replace('\x94', '”')
|
||||||
# Replace ndash (\x96)
|
# Replace ndash (\x96)
|
||||||
fixed = re.sub("\x96", "–", fixed)
|
fixed = fixed.replace('\x96', '–')
|
||||||
# Replace mdash (\x97)
|
# Replace mdash (\x97)
|
||||||
fixed = re.sub("\x97", "—", fixed)
|
fixed = fixed.replace('\x97', '—')
|
||||||
fixed = re.sub("’", "’", fixed)
|
fixed = fixed.replace('’', '’')
|
||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del(div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = self.url_prefix + url
|
url = self.url_prefix + url
|
||||||
if not url.startswith(self.url_prefix):
|
if not url.startswith(self.url_prefix):
|
||||||
print("Rejected " + url)
|
print('Rejected ' + url)
|
||||||
return
|
return
|
||||||
if url in self.url_list:
|
if url in self.url_list:
|
||||||
print("Rejected dup " + url)
|
print('Rejected dup ' + url)
|
||||||
return
|
return
|
||||||
self.url_list.append(url)
|
self.url_list.append(url)
|
||||||
title = self.tag_to_string(atag, False)
|
title = self.tag_to_string(atag, False)
|
||||||
@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return
|
return
|
||||||
dtag = adiv.find('div', 'content')
|
dtag = adiv.find('div', 'content')
|
||||||
description = ''
|
description = ''
|
||||||
print("URL " + url)
|
print('URL ' + url)
|
||||||
print("TITLE " + title)
|
print('TITLE ' + title)
|
||||||
if dtag is not None:
|
if dtag is not None:
|
||||||
stag = dtag.span
|
stag = dtag.span
|
||||||
if stag is not None:
|
if stag is not None:
|
||||||
@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
description = self.tag_to_string(stag, False)
|
description = self.tag_to_string(stag, False)
|
||||||
else:
|
else:
|
||||||
description = self.tag_to_string(dtag, False)
|
description = self.tag_to_string(dtag, False)
|
||||||
print("DESCRIPTION: " + description)
|
print('DESCRIPTION: ' + description)
|
||||||
if key not in articles:
|
if key not in articles:
|
||||||
articles[key] = []
|
articles[key] = []
|
||||||
articles[key].append(dict(
|
articles[key].append(dict(
|
||||||
title=title, url=url, date='', description=description, author='', content=''))
|
title=title, url=url, date='', description=description, author='', content=''))
|
||||||
|
|
||||||
def parse_web_index(key, keyurl):
|
def parse_web_index(key, keyurl):
|
||||||
print("Section: " + key + ': ' + self.url_prefix + keyurl)
|
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
|
||||||
try:
|
try:
|
||||||
soup = self.index_to_soup(self.url_prefix + keyurl)
|
soup = self.index_to_soup(self.url_prefix + keyurl)
|
||||||
except:
|
except:
|
||||||
print("Section: " + key + ' NOT FOUND')
|
print('Section: ' + key + ' NOT FOUND')
|
||||||
return
|
return
|
||||||
ans.append(key)
|
ans.append(key)
|
||||||
mainsoup = soup.find('div', 'bodywrapper')
|
mainsoup = soup.find('div', 'bodywrapper')
|
||||||
@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}):
|
for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}):
|
||||||
handle_article(wdiv, key)
|
handle_article(wdiv, key)
|
||||||
|
|
||||||
for (k, url) in self.postmedia_index_pages:
|
for k,url in self.postmedia_index_pages:
|
||||||
parse_web_index(k, url)
|
parse_web_index(k, url)
|
||||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||||
return ans
|
return ans
|
||||||
|
@ -4,7 +4,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1271446252(BasicNewsRecipe):
|
class AdvancedUserRecipe1271446252(BasicNewsRecipe):
|
||||||
title = u'CanardPC'
|
title = u'CanardPC'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
__author__ = 'zorgluf'
|
__author__ = 'zorgluf'
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user