Merge branch 'ruff-pep8-strict' of https://github.com/un-pogaz/calibre

This commit is contained in:
Kovid Goyal 2025-01-25 13:56:37 +05:30
commit 7e61ea2248
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
1492 changed files with 19225 additions and 19853 deletions

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -27,7 +26,7 @@ for name, src in sources.items():
os.chdir(iconset) os.chdir(iconset)
try: try:
for sz in (16, 32, 128, 256, 512, 1024): for sz in (16, 32, 128, 256, 512, 1024):
iname = 'icon_{0}x{0}.png'.format(sz) iname = f'icon_{sz}x{sz}.png'
iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2) iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2)
if src.endswith('.svg'): if src.endswith('.svg'):
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname]) subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
@ -36,7 +35,7 @@ for name, src in sources.items():
if sz == 512: if sz == 512:
shutil.copy2(src, iname) shutil.copy2(src, iname)
else: else:
subprocess.check_call(['convert', src, '-resize', '{0}x{0}'.format(sz), iname]) subprocess.check_call(['convert', src, '-resize', f'{sz}x{sz}', iname])
if sz > 16: if sz > 16:
shutil.copy2(iname, iname2x) shutil.copy2(iname, iname2x)
if sz > 512: if sz > 512:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -24,7 +23,7 @@ for name, src in sources.items():
try: try:
names = [] names = []
for sz in (16, 24, 32, 48, 64, 256): for sz in (16, 24, 32, 48, 64, 256):
iname = os.path.join('ico_temp', '{0}x{0}.png'.format(sz)) iname = os.path.join('ico_temp', f'{sz}x{sz}.png')
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname]) subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname]) subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname])
if sz >= 128: if sz >= 128:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import argparse import argparse

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
@ -27,7 +26,7 @@ def clone_node(node, parent):
def merge(): def merge():
base = os.path.dirname(os.path.abspath(__file__)) base = os.path.dirname(os.path.abspath(__file__))
ans = etree.fromstring( ans = etree.fromstring(
'<svg xmlns="%s" xmlns:xlink="%s"/>' % (SVG_NS, XLINK_NS), f'<svg xmlns="{SVG_NS}" xmlns:xlink="{XLINK_NS}"/>',
parser=etree.XMLParser( parser=etree.XMLParser(
recover=True, no_network=True, resolve_entities=False recover=True, no_network=True, resolve_entities=False
) )
@ -43,14 +42,14 @@ def merge():
recover=True, no_network=True, resolve_entities=False recover=True, no_network=True, resolve_entities=False
) )
) )
symbol = ans.makeelement('{%s}symbol' % SVG_NS) symbol = ans.makeelement('{%s}symbol' % SVG_NS) # noqa: UP031
symbol.set('viewBox', svg.get('viewBox')) symbol.set('viewBox', svg.get('viewBox'))
symbol.set('id', 'icon-' + f.rpartition('.')[0]) symbol.set('id', 'icon-' + f.rpartition('.')[0])
for child in svg.iterchildren('*'): for child in svg.iterchildren('*'):
clone_node(child, symbol) clone_node(child, symbol)
ans.append(symbol) ans.append(symbol)
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False) ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1) ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
return ans return ans

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3' __license__ = 'GPL v3'

View File

@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# calibre documentation build configuration file, created by # calibre documentation build configuration file, created by
# sphinx-quickstart.py on Sun Mar 23 01:23:55 2008. # sphinx-quickstart.py on Sun Mar 23 01:23:55 2008.
# #
@ -47,11 +45,11 @@ templates_path = ['templates']
source_suffix = {'.rst': 'restructuredtext'} source_suffix = {'.rst': 'restructuredtext'}
# The master toctree document. # The master toctree document.
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa master_doc = 'index' if tags.has('online') else 'simple_index' # noqa: F821
# kill the warning about index/simple_index not being in a toctree # kill the warning about index/simple_index not being in a toctree
exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst'] exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst']
exclude_patterns.append('cli-options-header.rst') exclude_patterns.append('cli-options-header.rst')
if tags.has('gettext'): # noqa if tags.has('gettext'): # noqa: F821
# Do not exclude anything as the strings must be translated. This will # Do not exclude anything as the strings must be translated. This will
# generate a warning about the documents not being in a toctree, just ignore # generate a warning about the documents not being in a toctree, just ignore
# it. # it.
@ -64,7 +62,7 @@ language = os.environ.get('CALIBRE_OVERRIDE_LANG', 'en')
def generated_langs(): def generated_langs():
try: try:
return os.listdir(os.path.join(base, 'generated')) return os.listdir(os.path.join(base, 'generated'))
except EnvironmentError as e: except OSError as e:
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise raise
return () return ()
@ -99,13 +97,13 @@ today_fmt = '%B %d, %Y'
unused_docs = ['global', 'cli/global'] unused_docs = ['global', 'cli/global']
locale_dirs = ['locale/'] locale_dirs = ['locale/']
title = '%s User Manual' % __appname__ title = f'{__appname__} User Manual'
needs_localization = language not in {'en', 'eng'} needs_localization = language not in {'en', 'eng'}
if needs_localization: if needs_localization:
import gettext import gettext
try: try:
t = gettext.translation('simple_index', locale_dirs[0], [language]) t = gettext.translation('simple_index', locale_dirs[0], [language])
except IOError: except OSError:
pass pass
else: else:
title = t.gettext(title) title = t.gettext(title)
@ -176,7 +174,7 @@ def sort_languages(x):
lc, name = x lc, name = x
if lc == language: if lc == language:
return '' return ''
return sort_key(type(u'')(name)) return sort_key(str(name))
website = 'https://calibre-ebook.com' website = 'https://calibre-ebook.com'
@ -193,13 +191,13 @@ extlinks = {
} }
del sort_languages, get_language del sort_languages, get_language
epub_author = u'Kovid Goyal' epub_author = 'Kovid Goyal'
epub_publisher = u'Kovid Goyal' epub_publisher = 'Kovid Goyal'
epub_copyright = u'© {} Kovid Goyal'.format(date.today().year) epub_copyright = f'© {date.today().year} Kovid Goyal'
epub_description = u'Comprehensive documentation for calibre' epub_description = 'Comprehensive documentation for calibre'
epub_identifier = u'https://manual.calibre-ebook.com' epub_identifier = 'https://manual.calibre-ebook.com'
epub_scheme = u'url' epub_scheme = 'url'
epub_uid = u'S54a88f8e9d42455e9c6db000e989225f' epub_uid = 'S54a88f8e9d42455e9c6db000e989225f'
epub_tocdepth = 4 epub_tocdepth = 4
epub_tocdup = True epub_tocdup = True
epub_cover = ('epub_cover.jpg', 'epub_cover_template.html') epub_cover = ('epub_cover.jpg', 'epub_cover_template.html')
@ -255,5 +253,5 @@ latex_show_pagerefs = True
latex_show_urls = 'footnote' latex_show_urls = 'footnote'
latex_elements = { latex_elements = {
'papersize':'letterpaper', 'papersize':'letterpaper',
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), 'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), # noqa: UP031
} }

View File

@ -33,7 +33,7 @@ def formatter_funcs():
ans = {'doc': {}, 'sum': {}} ans = {'doc': {}, 'sum': {}}
with TemporaryDirectory() as tdir: with TemporaryDirectory() as tdir:
db = LibraryDatabase(tdir) # needed to load formatter_funcs db = LibraryDatabase(tdir) # needed to load formatter_funcs
ffml = FFMLProcessor() ffml = FFMLProcessor()
all_funcs = formatter_functions().get_builtins() all_funcs = formatter_functions().get_builtins()
for func_name, func in all_funcs.items(): for func_name, func in all_funcs.items():
@ -195,13 +195,13 @@ details and examples.
lines = [] lines = []
for cmd in COMMANDS: for cmd in COMMANDS:
parser = option_parser_for(cmd)() parser = option_parser_for(cmd)()
lines += ['.. _calibredb-%s-%s:' % (language, cmd), ''] lines += [f'.. _calibredb-{language}-{cmd}:', '']
lines += [cmd, '~'*20, ''] lines += [cmd, '~'*20, '']
usage = parser.usage.strip() usage = parser.usage.strip()
usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()] usage = usage.replace('%prog', 'calibredb').splitlines()
cmdline = ' '+usage[0] cmdline = ' '+usage[0]
usage = usage[1:] usage = usage[1:]
usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage] usage = [re.sub(rf'({cmd})([^a-zA-Z0-9])', r':command:`\1`\2', i) for i in usage]
lines += ['.. code-block:: none', '', cmdline, ''] lines += ['.. code-block:: none', '', cmdline, '']
lines += usage lines += usage
groups = [(None, None, parser.option_list)] groups = [(None, None, parser.option_list)]
@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
parser, plumber = create_option_parser(['ebook-convert', parser, plumber = create_option_parser(['ebook-convert',
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log) 'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "INPUT OPTIONS"] parser.option_groups if g.title == 'INPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
for pl in sorted(output_format_plugins(), key=lambda x: x.name): for pl in sorted(output_format_plugins(), key=lambda x: x.name):
parser, plumber = create_option_parser(['ebook-convert', 'd.epub', parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
'dummyi.'+pl.file_type, '-h'], default_log) 'dummyi.'+pl.file_type, '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "OUTPUT OPTIONS"] parser.option_groups if g.title == 'OUTPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
@ -257,7 +257,7 @@ def generate_ebook_convert_help(preamble, app):
def update_cli_doc(name, raw, language): def update_cli_doc(name, raw, language):
if isinstance(raw, bytes): if isinstance(raw, bytes):
raw = raw.decode('utf-8') raw = raw.decode('utf-8')
path = 'generated/%s/%s.rst' % (language, name) path = f'generated/{language}/{name}.rst'
old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else '' old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else ''
if not os.path.exists(path) or old_raw != raw: if not os.path.exists(path) or old_raw != raw:
import difflib import difflib
@ -352,7 +352,7 @@ def cli_docs(language):
usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()] usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()]
cmdline = usage[0] cmdline = usage[0]
usage = usage[1:] usage = usage[1:]
usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage] usage = [i.replace(cmd, f':command:`{cmd}`') for i in usage]
usage = '\n'.join(usage) usage = '\n'.join(usage)
preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage) preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage)
if cmd == 'ebook-convert': if cmd == 'ebook-convert':
@ -382,7 +382,7 @@ def template_docs(language):
def localized_path(app, langcode, pagename): def localized_path(app, langcode, pagename):
href = app.builder.get_target_uri(pagename) href = app.builder.get_target_uri(pagename)
href = re.sub(r'generated/[a-z]+/', 'generated/%s/' % langcode, href) href = re.sub(r'generated/[a-z]+/', f'generated/{langcode}/', href)
prefix = '/' prefix = '/'
if langcode != 'en': if langcode != 'en':
prefix += langcode + '/' prefix += langcode + '/'
@ -397,7 +397,7 @@ def add_html_context(app, pagename, templatename, context, *args):
def guilabel_role(typ, rawtext, text, *args, **kwargs): def guilabel_role(typ, rawtext, text, *args, **kwargs):
from sphinx.roles import GUILabel from sphinx.roles import GUILabel
text = text.replace(u'->', u'\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}') text = text.replace('->', '\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
return GUILabel()(typ, rawtext, text, *args, **kwargs) return GUILabel()(typ, rawtext, text, *args, **kwargs)
@ -405,7 +405,7 @@ def setup_man_pages(app):
documented_cmds = get_cli_docs()[0] documented_cmds = get_cli_docs()[0]
man_pages = [] man_pages = []
for cmd, option_parser in documented_cmds: for cmd, option_parser in documented_cmds:
path = 'generated/%s/%s' % (app.config.language, cmd) path = f'generated/{app.config.language}/{cmd}'
man_pages.append(( man_pages.append((
path, cmd, cmd, 'Kovid Goyal', 1 path, cmd, cmd, 'Kovid Goyal', 1
)) ))

View File

@ -49,8 +49,8 @@ class EPUBHelpBuilder(EpubBuilder):
imgname = container.href_to_name(img.get('src'), name) imgname = container.href_to_name(img.get('src'), name)
fmt, width, height = identify(container.raw_data(imgname)) fmt, width, height = identify(container.raw_data(imgname))
if width == -1: if width == -1:
raise ValueError('Failed to read size of: %s' % imgname) raise ValueError(f'Failed to read size of: {imgname}')
img.set('style', 'width: %dpx; height: %dpx' % (width, height)) img.set('style', f'width: {width}px; height: {height}px')
def fix_opf(self, container): def fix_opf(self, container):
spine_names = {n for n, l in container.spine_names} spine_names = {n for n, l in container.spine_names}
@ -75,7 +75,7 @@ class EPUBHelpBuilder(EpubBuilder):
# Ensure that the cover-image property is set # Ensure that the cover-image property is set
cover_id = rmap['_static/' + self.config.epub_cover[0]] cover_id = rmap['_static/' + self.config.epub_cover[0]]
for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)): for item in container.opf_xpath(f'//opf:item[@id="{cover_id}"]'):
item.set('properties', 'cover-image') item.set('properties', 'cover-image')
for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'): for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'):
item.set('properties', 'svg calibre:title-page') item.set('properties', 'svg calibre:title-page')

View File

@ -32,7 +32,7 @@ class DemoTool(Tool):
def create_action(self, for_toolbar=True): def create_action(self, for_toolbar=True):
# Create an action, this will be added to the plugins toolbar and # Create an action, this will be added to the plugins toolbar and
# the plugins menu # the plugins menu
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa: F821
if not for_toolbar: if not for_toolbar:
# Register a keyboard shortcut for this toolbar action. We only # Register a keyboard shortcut for this toolbar action. We only
# register it for the action created for the menu, not the toolbar, # register it for the action created for the menu, not the toolbar,

View File

@ -13,13 +13,13 @@ from calibre.customize import FileTypePlugin
class HelloWorld(FileTypePlugin): class HelloWorld(FileTypePlugin):
name = 'Hello World Plugin' # Name of the plugin name = 'Hello World Plugin' # Name of the plugin
description = 'Set the publisher to Hello World for all new conversions' description = 'Set the publisher to Hello World for all new conversions'
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
author = 'Acme Inc.' # The author of this plugin author = 'Acme Inc.' # The author of this plugin
version = (1, 0, 0) # The version number of this plugin version = (1, 0, 0) # The version number of this plugin
file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to
on_postprocess = True # Run this plugin after conversion is complete on_postprocess = True # Run this plugin after conversion is complete
minimum_calibre_version = (0, 7, 53) minimum_calibre_version = (0, 7, 53)
def run(self, path_to_ebook): def run(self, path_to_ebook):

View File

@ -76,5 +76,3 @@ class InterfacePluginDemo(InterfaceActionBase):
ac = self.actual_plugin_ ac = self.actual_plugin_
if ac is not None: if ac is not None:
ac.apply_settings() ac.apply_settings()

View File

@ -55,7 +55,7 @@ class DemoDialog(QDialog):
self.l.addWidget(self.view_button) self.l.addWidget(self.view_button)
self.update_metadata_button = QPushButton( self.update_metadata_button = QPushButton(
'Update metadata in a book\'s files', self) "Update metadata in a book's files", self)
self.update_metadata_button.clicked.connect(self.update_metadata) self.update_metadata_button.clicked.connect(self.update_metadata)
self.l.addWidget(self.update_metadata_button) self.l.addWidget(self.update_metadata_button)

View File

@ -54,8 +54,8 @@ class checkbox(nodes.Element):
def visit_checkbox(self, node): def visit_checkbox(self, node):
cid = node['ids'][0] cid = node['ids'][0]
node['classes'] = [] node['classes'] = []
self.body.append('<input id="{0}" type="checkbox" />' self.body.append(f'<input id="{cid}" type="checkbox" />'
'<label for="{0}">&nbsp;</label>'.format(cid)) f'<label for="{cid}">&nbsp;</label>')
def modify_li(li): def modify_li(li):
@ -66,7 +66,7 @@ def modify_li(li):
li['classes'].append('leaf-node') li['classes'].append('leaf-node')
else: else:
c = checkbox() c = checkbox()
c['ids'] = ['collapse-checkbox-{}'.format(next(id_counter))] c['ids'] = [f'collapse-checkbox-{next(id_counter)}']
li.insert(0, c) li.insert(0, c)

View File

@ -61,7 +61,7 @@ def generate_template_language_help(language, log):
a = output.append a = output.append
with TemporaryDirectory() as tdir: with TemporaryDirectory() as tdir:
db = LibraryDatabase(tdir) # needed to load formatter_funcs db = LibraryDatabase(tdir) # needed to load formatter_funcs
ffml = FFMLProcessor() ffml = FFMLProcessor()
all_funcs = formatter_functions().get_builtins() all_funcs = formatter_functions().get_builtins()
categories = defaultdict(dict) categories = defaultdict(dict)
@ -89,5 +89,6 @@ def generate_template_language_help(language, log):
a(POSTAMBLE) a(POSTAMBLE)
return ''.join(output) return ''.join(output)
if __name__ == '__main__': if __name__ == '__main__':
generate_template_language_help() generate_template_language_help()

View File

@ -21,10 +21,11 @@ quote-style = 'single'
[tool.ruff.lint] [tool.ruff.lint]
ignore = ['E402', 'E722', 'E741'] ignore = ['E402', 'E722', 'E741']
select = ['E', 'F', 'I', 'W', 'INT'] select = ['E', 'F', 'I', 'W', 'INT', 'PIE794']
unfixable = ['PIE794']
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501', 'W191'] "src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501']
"src/qt/*.py" = ['I'] "src/qt/*.py" = ['I']
"src/qt/*.pyi" = ['I'] "src/qt/*.pyi" = ['I']

View File

@ -17,6 +17,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
use_archive = True use_archive = True
def E(parent, name, text='', **attrs): def E(parent, name, text='', **attrs):
ans = parent.makeelement(name, **attrs) ans = parent.makeelement(name, **attrs)
ans.text = text ans.text = text
@ -60,8 +61,8 @@ if use_archive:
data = json.loads(raw) data = json.loads(raw)
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try: try:
date = data['dateModified'] date = data['dateModified']
@ -95,7 +96,7 @@ else:
for child in tuple(body): for child in tuple(body):
body.remove(child) body.remove(child)
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;') E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', replace_entities(data['headline'])) E(article, 'h1', replace_entities(data['headline']))
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;') E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
if data['dateline'] is None: if data['dateline'] is None:
@ -157,7 +158,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal" __author__ = 'Kovid Goyal'
description = ( description = (
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and ' 'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
'an unworthy, timid ignorance obstructing our progress.”' 'an unworthy, timid ignorance obstructing our progress.”'
@ -170,7 +171,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header', 'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -224,13 +225,13 @@ class Economist(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
# return self.economist_test_article() # return self.economist_test_article()
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub') soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is None: if script_tag is None:
raise ValueError('No script tag with JSON data found in the weeklyedition archive') raise ValueError('No script tag with JSON data found in the weeklyedition archive')
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1] content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1]
query = { query = {
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa 'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501
'operationName': 'HubsDataQuery', 'operationName': 'HubsDataQuery',
'variables': '{{"id":"/content/{}","size":40}}'.format(content_id), 'variables': '{{"id":"/content/{}","size":40}}'.format(content_id),
} }
@ -247,22 +248,22 @@ class Economist(BasicNewsRecipe):
self.description = data['description'] self.description = data['description']
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"): for part in safe_dict(data, 'hasPart', 'parts'):
section = part['title'] section = part['title']
self.log(section) self.log(section)
for art in safe_dict(part, "hasPart", "parts"): for art in safe_dict(part, 'hasPart', 'parts'):
title = safe_dict(art, "title") title = safe_dict(art, 'title')
desc = safe_dict(art, "rubric") or '' desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, "flyTitle") or '' sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(art).encode('utf-8')) pt.write(json.dumps(art).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc) self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()] return list(feeds_dict.items())
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
article.url = soup.find('h1')['title'] article.url = soup.find('h1')['title']
@ -273,7 +274,7 @@ class Economist(BasicNewsRecipe):
'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/') 'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/')
return soup return soup
else: # Load articles from individual article pages {{{ else: # Load articles from individual article pages {{{
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs) BasicNewsRecipe.__init__(self, *args, **kwargs)
@ -311,26 +312,26 @@ class Economist(BasicNewsRecipe):
return ans return ans
def economist_parse_index(self, soup): def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None: if script_tag is not None:
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline") self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600' # self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
feeds = [] feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "collections"): for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
section = safe_dict(coll, "headline") or '' section = safe_dict(coll, 'headline') or ''
self.log(section) self.log(section)
articles = [] articles = []
for part in safe_dict(coll, "hasPart", "parts"): for part in safe_dict(coll, 'hasPart', 'parts'):
title = safe_dict(part, "headline") or '' title = safe_dict(part, 'headline') or ''
url = safe_dict(part, "url", "canonical") or '' url = safe_dict(part, 'url', 'canonical') or ''
if not title or not url: if not title or not url:
continue continue
desc = safe_dict(part, "description") or '' desc = safe_dict(part, 'description') or ''
sub = safe_dict(part, "subheadline") or '' sub = safe_dict(part, 'subheadline') or ''
if sub: if sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url) self.log('\t', title, '\n\t', desc, '\n\t\t', url)
@ -341,7 +342,6 @@ class Economist(BasicNewsRecipe):
# }}} # }}}
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
# open('/t/raw.html', 'wb').write(raw.encode('utf-8')) # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
if use_archive: if use_archive:
@ -358,9 +358,9 @@ class Economist(BasicNewsRecipe):
cleanup_html_article(root) cleanup_html_article(root)
if '/interactive/' in url: if '/interactive/' in url:
return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \ return ('<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>'
+ 'This article is supposed to be read in a browser' \ 'This article is supposed to be read in a browser.'
+ '</em></article></body></html>' '</em></article></body></html>')
for div in root.xpath('//div[@class="lazy-image"]'): for div in root.xpath('//div[@class="lazy-image"]'):
noscript = list(div.iter('noscript')) noscript = list(div.iter('noscript'))

View File

@ -36,22 +36,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa: E501
] ]
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']}) remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
remove_tags_after = dict( remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']})
name='div', attrs={'class': ['related-news', 'col']})
remove_tags = [ remove_tags = [
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
] ]
extra_css = """ extra_css = '''
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
""" '''
preprocess_regexps = [(re.compile( preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')] r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]

View File

@ -9,8 +9,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
def classes(classes): def classes(classes):
q = frozenset(classes.split(' ')) q = frozenset(classes.split(' '))
return dict(attrs={ return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class Minutes(BasicNewsRecipe): class Minutes(BasicNewsRecipe):

View File

@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
for title, url in [ for title, url in [
("They Draw and Cook", "http://www.theydrawandcook.com/") ('They Draw and Cook', 'http://www.theydrawandcook.com/')
]: ]:
articles = self.make_links(url) articles = self.make_links(url)
if articles: if articles:

View File

@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheMITPressReader(BasicNewsRecipe): class TheMITPressReader(BasicNewsRecipe):
title = "The MIT Press Reader" title = 'The MIT Press Reader'
__author__ = 'yodha8' __author__ = 'yodha8'
language = 'en' language = 'en'
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors." description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
" This recipe pulls articles from the past 7 days.") ' This recipe pulls articles from the past 7 days.')
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True

View File

@ -38,9 +38,9 @@ class ABCNews(BasicNewsRecipe):
if d and isinstance(d, str): if d and isinstance(d, str):
self.oldest_article = float(d) self.oldest_article = float(d)
# auto_cleanup = True # enable this as a backup option if recipe stops working # auto_cleanup = True # enable this as a backup option if recipe stops working
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data) # use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
@ -57,7 +57,7 @@ class ABCNews(BasicNewsRecipe):
# Clear out all the unwanted html tags: # Clear out all the unwanted html tags:
# ************************************ # ************************************
remove_tags = [ remove_tags = [
# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)}) # dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)})
{ {
'name': ['meta', 'link', 'noscript', 'aside'] 'name': ['meta', 'link', 'noscript', 'aside']
}, },
@ -98,12 +98,12 @@ class ABCNews(BasicNewsRecipe):
('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'), ('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'),
('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'), ('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'),
('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'), ('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'),
# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line # ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line
# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line # ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line
# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line # ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line
# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line # ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line
# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line # ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line
# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line # ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line
# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line # ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line
# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line # ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line
] ]

View File

@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
if d and isinstance(d, str): if d and isinstance(d, str):
self.oldest_article = float(d) self.oldest_article = float(d)
extra_css = """ extra_css = '''
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; } h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
""" '''
feeds = [ feeds = [

View File

@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
lambda m: '<title>' + m.group(1) + '</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')] (re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
extra_css = """ extra_css = '''
.chapo{font-style:italic; margin: 1em 0 0.5em} .chapo{font-style:italic; margin: 1em 0 0.5em}
""" '''

View File

@ -85,9 +85,10 @@ class ADRecipe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
parts = url.split('/') parts = url.split('/')
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \ print_url = 'http://' + '/'.join([
+ parts[10] + '/' + parts[7] + '/print/' + \ parts[2], parts[3], parts[4], parts[5], parts[10],
parts[8] + '/' + parts[9] + '/' + parts[13] parts[7], 'print', parts[8], parts[9], parts[13],
])
return print_url return print_url

View File

@ -33,7 +33,7 @@ class Adevarul(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa: E501
] ]
remove_tags_after = [ remove_tags_after = [

View File

@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
INDEX = u'http://www.adventuregamers.com' INDEX = u'http://www.adventuregamers.com'
extra_css = """ extra_css = '''
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title,.page_title{font-size: xx-large; color: #394128} .pageheader_title,.page_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128} .pageheader_byline{font-size: small; font-weight: bold; color: #394128}
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
.score_header{font-size: large; color: #50544A} .score_header{font-size: large; color: #50544A}
img{margin-bottom: 1em;} img{margin-bottom: 1em;}
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif} body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -8,13 +8,14 @@ def absurl(url):
if url.startswith('/'): if url.startswith('/'):
return 'https://www.afr.com' + url return 'https://www.afr.com' + url
class afr(BasicNewsRecipe): class afr(BasicNewsRecipe):
title = 'Australian Financial Review' title = 'Australian Financial Review'
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = ( description = (
'For more than 65 years The Australian Financial Review has been the authority on business,' 'For more than 65 years The Australian Financial Review has been the authority on business,'
' finance and investment news in Australia. It has a reputation for independent, award-winning ' ' finance and investment news in Australia. It has a reputation for independent, award-winning '
'journalism and is essential reading for Australia\'s business and investor community.' "journalism and is essential reading for Australia's business and investor community."
) )
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png' masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
encoding = 'utf-8' encoding = 'utf-8'
@ -24,7 +25,6 @@ class afr(BasicNewsRecipe):
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
max_articles_per_feed = 25 max_articles_per_feed = 25
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True
remove_attributes = ['style', 'height', 'width'] remove_attributes = ['style', 'height', 'width']
keep_only_tags = [ keep_only_tags = [
@ -39,7 +39,7 @@ class afr(BasicNewsRecipe):
dict(name=['button', 'aside', 'svg']), dict(name=['button', 'aside', 'svg']),
] ]
remove_tags_after= [ dict(name='aside', attrs={'id':'stickyContainer'})] remove_tags_after= [dict(name='aside', attrs={'id':'stickyContainer'})]
extra_css = ''' extra_css = '''
#img-cap {font-size:small; text-align:center;} #img-cap {font-size:small; text-align:center;}

View File

@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
''' '''
def default_cover(self, cover_file): def default_cover(self, cover_file):
""" '''
Crée une couverture personnalisée avec le logo Crée une couverture personnalisée avec le logo
""" '''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd] weekday = french_weekday[wkd]
month = french_month[today.month] month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}" date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh') edition = today.strftime('Édition de %Hh')
# Image de base # Image de base

View File

@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Agents(BasicNewsRecipe): class Agents(BasicNewsRecipe):
title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB' title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa: E501
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
publisher = 'Project Media' publisher = 'Project Media'
publication_type = 'news' publication_type = 'news'

View File

@ -32,7 +32,7 @@ class aktualneRecipe(BasicNewsRecipe):
remove_attributes = [] remove_attributes = []
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']}) remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
filter_regexps = [r'img.aktualne.centrum.cz'] filter_regexps = [r'img.aktualne.centrum.cz']
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}), remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}), dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
dict(name='div', attrs={'class': 'itemcomment id0'}), dict(name='div', attrs={'class': 'itemcomment id0'}),
dict(name='div', attrs={'class': 'hlavicka'}), dict(name='div', attrs={'class': 'hlavicka'}),

View File

@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
extra_css = """ extra_css = '''
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'comment': description, 'tags': category,
'publisher': publisher, 'language': language 'publisher': publisher, 'language': language
@ -55,7 +55,7 @@ class AlJazeera(BasicNewsRecipe):
u'http://www.aljazeera.com/xml/rss/all.xml')] u'http://www.aljazeera.com/xml/rss/all.xml')]
def get_article_url(self, article): def get_article_url(self, article):
artlurl = article.get('link', None) artlurl = article.get('link', None)
return artlurl return artlurl
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -22,7 +22,7 @@ class AlMasryAlyoum(BasicNewsRecipe):
category = 'News' category = 'News'
publication_type = 'newsportal' publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa: E501
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': ['article']}) dict(name='div', attrs={'class': ['article']})

View File

@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
title = title[0:120] + '...' title = title[0:120] + '...'
href = link.get('href') href = link.get('href')
if not href: if not href:
self._p("BAD HREF: " + str(link)) self._p('BAD HREF: ' + str(link))
return return
self.queue_article_link(section, href, title) self.queue_article_link(section, href, title)
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
age = (datetime.datetime.now() - date).days age = (datetime.datetime.now() - date).days
if (age > self.oldest_article): if (age > self.oldest_article):
return "too old" return 'too old'
return False return False
def scrape_article_date(self, soup): def scrape_article_date(self, soup):
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
def date_from_string(self, datestring): def date_from_string(self, datestring):
try: try:
# eg: Posted September 17, 2014 # eg: Posted September 17, 2014
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y") dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
except: except:
dt = None dt = None
@ -203,11 +203,10 @@ class AlMonitor(BasicNewsRecipe):
return self.tag_to_string(n).strip() return self.tag_to_string(n).strip()
def _dbg_soup_node(self, node): def _dbg_soup_node(self, node):
s = ' cls: ' + str(node.get('class')).strip() + \ return (' cls: ' + str(node.get('class')).strip() +
' id: ' + str(node.get('id')).strip() + \ ' id: ' + str(node.get('id')).strip() +
' role: ' + str(node.get('role')).strip() + \ ' role: ' + str(node.get('role')).strip() +
' txt: ' + self.text(node) ' txt: ' + self.text(node))
return s
def _p(self, msg): def _p(self, msg):
curframe = inspect.currentframe() curframe = inspect.currentframe()

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AlbertMohlersBlog(BasicNewsRecipe): class AlbertMohlersBlog(BasicNewsRecipe):
title = u'Albert Mohler\'s Blog' title = u"Albert Mohler's Blog"
__author__ = 'Peter Grungi' __author__ = 'Peter Grungi'
language = 'en' language = 'en'
oldest_article = 90 oldest_article = 90
@ -13,8 +13,7 @@ class AlbertMohlersBlog(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif' cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
publisher = 'Albert Mohler' publisher = 'Albert Mohler'
language = 'en'
author = 'Albert Mohler' author = 'Albert Mohler'
feeds = [(u'Albert Mohler\'s Blog', feeds = [(u"Albert Mohler's Blog",
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')] u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]

View File

@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
# Extract a list of dates from the page. # Extract a list of dates from the page.
# Subset this out to the list of target dates for extraction. # Subset this out to the list of target dates for extraction.
date_list = [] date_list = []
for div in soup.findAll('div', attrs={'id': "dayheader"}): for div in soup.findAll('div', attrs={'id': 'dayheader'}):
date_list.append(self.tag_to_string(div)) date_list.append(self.tag_to_string(div))
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list] date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
date_list_bool = [ date_list_bool = [
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
# Process each paragraph one by one. # Process each paragraph one by one.
# Stop when the text of the previous div is not in the target date list. # Stop when the text of the previous div is not in the target date list.
for div in soup.findAll('div', attrs={'class': "mobile-front"}): for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
for p in div.findAll('p'): for p in div.findAll('p'):
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date: if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
if p.find('a'): if p.find('a'):
title = self.tag_to_string(p) title = self.tag_to_string(p)
link = p.find('a')['href'] link = p.find('a')['href']
if self.tag_to_string(p.findPreviousSibling('h3') if self.tag_to_string(p.findPreviousSibling('h3')
) == "Articles of Note": ) == 'Articles of Note':
articles_note.append({ articles_note.append({
'title': title, 'title': title,
'url': link, 'url': link,
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
'date': '' 'date': ''
}) })
elif self.tag_to_string(p.findPreviousSibling('h3') elif self.tag_to_string(p.findPreviousSibling('h3')
) == "New Books": ) == 'New Books':
new_books.append({ new_books.append({
'title': title, 'title': title,
'url': link, 'url': link,

View File

@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
tag = soup.find(attrs={'class': 'rodzaj'}) tag = soup.find(attrs={'class': 'rodzaj'})
if tag and tag.a.string.lower().strip() == 'recenzje': if tag and tag.a.string.lower().strip() == 'recenzje':
link = soup.find(text=re.compile('recenzuje')) link = soup.find(text=re.compile(r'recenzuje'))
if link: if link:
return self.index_to_soup(link.parent['href'], raw=True) return self.index_to_soup(link.parent['href'], raw=True)

View File

@ -21,7 +21,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
remove_images = False remove_images = False
def get_cover_url(self): def get_cover_url(self):
"""Récupère dynamiquement l'URL de la dernière une depuis MLP""" '''Récupère dynamiquement l'URL de la dernière une depuis MLP'''
br = self.get_browser() br = self.get_browser()
try: try:
# Accéder à la page du magazine sur MLP # Accéder à la page du magazine sur MLP
@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
self.log('Cover URL found:', cover_url) self.log('Cover URL found:', cover_url)
return cover_url return cover_url
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut') self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg' return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
except Exception as e: except Exception as e:
@ -92,7 +92,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
display_name = section_name.replace('-', ' ').title() display_name = section_name.replace('-', ' ').title()
articles.append((display_name, feed_articles[:self.max_articles_per_feed])) articles.append((display_name, feed_articles[:self.max_articles_per_feed]))
except Exception as e: except Exception as e:
self.log.error(f'Error processing {section_name}: {str(e)}') self.log.error(f'Error processing {section_name}: {e}')
continue continue
return articles return articles
@ -133,7 +133,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
'description': '' 'description': ''
}) })
except Exception as e: except Exception as e:
self.log.error(f'Error getting H1 title for {article_url}: {str(e)}') self.log.error(f'Error getting H1 title for {article_url}: {e}')
continue continue
return feed_articles return feed_articles

View File

@ -21,11 +21,9 @@ class WwwAltomdata_dk(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_empty_feeds = True remove_empty_feeds = True
auto_cleanup = True auto_cleanup = True
language = 'da'
feeds = [ feeds = [
('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'), ('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'),
('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'), ('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'),
] ]

View File

@ -34,7 +34,7 @@ class AM730(BasicNewsRecipe):
description = 'http://www.am730.com.hk' description = 'http://www.am730.com.hk'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png' masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa: E501
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}), remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
dict(name='div',attrs={'class':'logo-container print-logo'}), dict(name='div',attrs={'class':'logo-container print-logo'}),
dict(name='div',attrs={'id':'galleria'})] dict(name='div',attrs={'id':'galleria'})]
@ -53,12 +53,12 @@ class AM730(BasicNewsRecipe):
return self.masthead_url return self.masthead_url
def getAMSectionArticles(self, sectionName,url): def getAMSectionArticles(self, sectionName,url):
# print sectionName # print(sectionName)
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
articles = [] articles = []
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}): for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
href = aTag.get('href',False) href = aTag.get('href',False)
if not href.encode("utf-8").startswith(url.encode("utf-8")) : if not href.encode('utf-8').startswith(url.encode('utf-8')):
continue # not in same section continue # not in same section
title = href.split('/')[-1].split('-')[0] title = href.split('/')[-1].split('-')[0]
@ -67,7 +67,7 @@ class AM730(BasicNewsRecipe):
print(title) print(title)
try: try:
if articles.index({'title':title,'url':href})>=0: if articles.index({'title':title,'url':href})>=0:
# print 'already added' # print('already added')
continue # already added continue # already added
except: except:
pass pass
@ -78,7 +78,7 @@ class AM730(BasicNewsRecipe):
break break
if self.debug: if self.debug:
print(articles) print(articles)
return (sectionName,articles) return sectionName, articles
def parse_index(self): def parse_index(self):
# hard code sections # hard code sections
@ -89,12 +89,12 @@ class AM730(BasicNewsRecipe):
('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'), ('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'),
('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'), ('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'),
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F') ('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
] # articles =[] ] # articles =[]
SectionsArticles=[] SectionsArticles=[]
for (title, url) in Sections: for title, url in Sections:
if self.debug: if self.debug:
print(title) print(title)
print(url) print(url)
SectionsArticles.append(self.getAMSectionArticles(title,url)) SectionsArticles.append(self.getAMSectionArticles(title,url))
# feeds.append(articles[0]['url']) # feeds.append(articles[0]['url'])
return SectionsArticles return SectionsArticles

View File

@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
language = 'es_AR' language = 'es_AR'
publication_type = 'newsportal' publication_type = 'newsportal'
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg' masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
extra_css = """ extra_css = '''
body{font-family: Roboto, sans-serif} body{font-family: Roboto, sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'comment': description,

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AmericanThinker(BasicNewsRecipe): class AmericanThinker(BasicNewsRecipe):
title = u'American Thinker' title = u'American Thinker'
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans." description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
__author__ = 'Walt Anthony' __author__ = 'Walt Anthony'
publisher = 'Thomas Lifson' publisher = 'Thomas Lifson'
category = 'news, politics, USA' category = 'news, politics, USA'
@ -33,7 +33,7 @@ class AmericanThinker(BasicNewsRecipe):
root = html5lib.parse( root = html5lib.parse(
clean_xml_chars(raw), treebuilder='lxml', clean_xml_chars(raw), treebuilder='lxml',
namespaceHTMLElements=False) namespaceHTMLElements=False)
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa: E501
x.getparent().remove(x) x.getparent().remove(x)
return etree.tostring(root, encoding='unicode') return etree.tostring(root, encoding='unicode')

View File

@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update # return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update return url.replace('/show/', '/print/') # 2014-02-27 AGE: update

View File

@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
language = 'en' language = 'en'
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = ( description = (
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. ' "Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering ' 'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
'more than 5000 years of Egyptian history. Published bimonthly.' 'more than 5000 years of Egyptian history. Published bimonthly.'
) )

View File

@ -5,7 +5,7 @@ from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
# figure out your local edition id from the log of this recipe # figure out your local edition id from the log of this recipe
edi_id = 182 # NTR VIJAYAWADA - 182 edi_id = 182 # NTR VIJAYAWADA - 182
today = date.today().strftime('%d/%m/%Y') today = date.today().strftime('%d/%m/%Y')
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
index = 'https://epaper.andhrajyothy.com' index = 'https://epaper.andhrajyothy.com'
class andhra(BasicNewsRecipe): class andhra(BasicNewsRecipe):
title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్' title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్'
language = 'te' language = 'te'
@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe):
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
for edi in edi_data: for edi in edi_data:
if edi['org_location'] in {'Magazines', 'Navya Daily'}: if edi['org_location'] in {'Magazines', 'Navya Daily'}:
continue continue
self.log(edi['org_location']) self.log(edi['org_location'])
cities = [] cities = []
for edi_loc in edi['editionlocation']: for edi_loc in edi['editionlocation']:
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId']) url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4: if snaps['ObjectType'] == 4:
continue continue
feeds_dict[section].append({"title": '', "url": url}) feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()] return list(feeds_dict.items())
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):
data = json.loads(raw) data = json.loads(raw)

View File

@ -5,7 +5,7 @@ from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
# figure out your local edition id from the log of this recipe # figure out your local edition id from the log of this recipe
edi_id = 34 # HYDERABAD MAIN I - 34 edi_id = 34 # HYDERABAD MAIN I - 34
today = date.today().strftime('%d/%m/%Y') today = date.today().strftime('%d/%m/%Y')
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
index = 'https://epaper.andhrajyothy.com' index = 'https://epaper.andhrajyothy.com'
class andhra(BasicNewsRecipe): class andhra(BasicNewsRecipe):
title = 'ఆంధ్రజ్యోతి - తెలంగాణ' title = 'ఆంధ్రజ్యోతి - తెలంగాణ'
language = 'te' language = 'te'
@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe):
self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n')
for edi in edi_data: for edi in edi_data:
if edi['org_location'] in {'Magazines', 'Navya Daily'}: if edi['org_location'] in {'Magazines', 'Navya Daily'}:
continue continue
self.log(edi['org_location']) self.log(edi['org_location'])
cities = [] cities = []
for edi_loc in edi['editionlocation']: for edi_loc in edi['editionlocation']:
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId']) url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4: if snaps['ObjectType'] == 4:
continue continue
feeds_dict[section].append({"title": '', "url": url}) feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()] return list(feeds_dict.items())
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):
data = json.loads(raw) data = json.loads(raw)

View File

@ -16,5 +16,5 @@ class Android_com_pl(BasicNewsRecipe):
remove_tags_after = [{'class': 'post-content'}] remove_tags_after = [{'class': 'post-content'}]
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})] remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')] (re.compile(r'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
feeds = [(u'Android', u'http://android.com.pl/feed/')] feeds = [(u'Android', u'http://android.com.pl/feed/')]

View File

@ -23,36 +23,36 @@ class AdvancedUserRecipe1718384518(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
feeds = [ feeds = [
#Phones # Phones
('Phones', 'https://www.androidpolice.com/feed/phones/'), ('Phones', 'https://www.androidpolice.com/feed/phones/'),
('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'), ('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'),
('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'), ('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'),
('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'), ('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'),
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'), ('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
#Google # Google
('Google', 'https://www.androidpolice.com/feed/google/'), ('Google', 'https://www.androidpolice.com/feed/google/'),
('News about Google', 'https://www.androidpolice.com/feed/news-google/'), ('News about Google', 'https://www.androidpolice.com/feed/news-google/'),
('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'), ('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'),
('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'), ('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'),
('Features about Google', 'https://www.androidpolice.com/feed/features-google/'), ('Features about Google', 'https://www.androidpolice.com/feed/features-google/'),
#Operating Systems # Operating Systems
('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'), ('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'),
('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'), ('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'),
('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'), ('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'),
('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'), ('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'),
#Chromebooks # Chromebooks
('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'), ('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'),
('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'), ('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'),
('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'), ('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'),
('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'), ('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'),
#Gadgets # Gadgets
('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'), ('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'),
('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'), ('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'),
('Audio', 'https://www.androidpolice.com/feed/tag/audio/'), ('Audio', 'https://www.androidpolice.com/feed/tag/audio/'),
('Accessories', 'https://www.androidpolice.com/feed/accessories/'), ('Accessories', 'https://www.androidpolice.com/feed/accessories/'),
('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'), ('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'),
('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'), ('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'),
#Reviews # Reviews
('Reviews', 'https://www.androidpolice.com/feed/reviews/'), ('Reviews', 'https://www.androidpolice.com/feed/reviews/'),
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'), ('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'), ('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'),

View File

@ -32,14 +32,11 @@ class AnimalPolitico(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.animalpolitico.com/') soup = self.index_to_soup('http://www.animalpolitico.com/')
articles = [] articles = []
for a in soup(**{ for a in soup(name='a', attrs={
'name': 'a',
'attrs': {
'href': True, 'title': True, 'href': True, 'title': True,
'data-author': True, 'data-type': True, 'data-author': True, 'data-type': True,
'data-home-title': True 'data-home-title': True
} }):
}):
title = a['title'] title = a['title']
url = a['href'] url = a['href']
author = a['data-author'] author = a['data-author']

View File

@ -19,8 +19,6 @@ class AmericanProspect(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
dict(id=['title', 'content']), dict(id=['title', 'content']),
] ]

View File

@ -18,8 +18,6 @@ class Arbetaren_SE(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
language = 'sv' language = 'sv'
auto_cleanup = True auto_cleanup = True
auto_cleanup_keep = '//div[@class="thumbnail"]' auto_cleanup_keep = '//div[@class="thumbnail"]|//div[@id="article-image"]|//span[@class="important"]'
auto_cleanup_keep = '//div[@id="article-image"]'
auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')] feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')]

View File

@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"), # (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"), # (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"), # (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
(u"BC", u"https://www.arcamax.com/thefunnies/bc"), (u'BC', u'https://www.arcamax.com/thefunnies/bc'),
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"), # (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"), # (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"), (u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"), # u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"), # (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"), # (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"), # (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"), (u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"), # (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"), # (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"), (u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"), (u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"), # (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"), # (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"), # (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"), # (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"), # (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"), # (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"), (u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"), # (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"), # (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"), # (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"), # (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"), # (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"), # (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"), (u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"), (u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"), (u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
]: ]:
self.log('Finding strips for:', title) self.log('Finding strips for:', title)
articles = self.make_links(url, title) articles = self.make_links(url, title)

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ArretSurImages(BasicNewsRecipe): class ArretSurImages(BasicNewsRecipe):
title = 'Arrêt sur Images' title = 'Arrêt sur Images'
description = 'Site français d\'analyse des médias' description = "Site français d'analyse des médias"
language = 'fr' language = 'fr'
encoding = 'utf-8' encoding = 'utf-8'
needs_subscription = True needs_subscription = True
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
] ]
def default_cover(self, cover_file): def default_cover(self, cover_file):
""" '''
Crée une couverture personnalisée avec le logo ASI Crée une couverture personnalisée avec le logo ASI
""" '''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
weekday = french_weekday[wkd] weekday = french_weekday[wkd]
month = french_month[today.month] month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}" date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh') edition = today.strftime('Édition de %Hh')
img = QImage(1400, 1920, QImage.Format_RGB888) img = QImage(1400, 1920, QImage.Format_RGB888)
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')] br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
print('Authentification réussie') print('Authentification réussie')
else: else:
print('Échec de l\'authentification - Vérifiez vos identifiants') print("Échec de l'authentification - Vérifiez vos identifiants")
except Exception as e: except Exception as e:
print(f'Erreur lors de l\'authentification: {str(e)}') print(f"Erreur lors de l'authentification: {e}")
return br return br
def get_article_url(self, article): def get_article_url(self, article):
@ -162,7 +162,7 @@ class ArretSurImages(BasicNewsRecipe):
</html> </html>
''' '''
except Exception as e: except Exception as e:
print(f'Erreur preprocessing HTML: {str(e)}') print(f'Erreur preprocessing HTML: {e}')
return raw_html return raw_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -186,11 +186,11 @@ class ArretSurImages(BasicNewsRecipe):
else: else:
tag.replace_with(img_tag) tag.replace_with(img_tag)
except Exception as e: except Exception as e:
print(f'Erreur processing image: {str(e)}') print(f'Erreur processing image: {e}')
tag.decompose() tag.decompose()
else: else:
tag.decompose() tag.decompose()
return soup return soup
except Exception as e: except Exception as e:
print(f'Erreur preprocessing HTML: {str(e)}') print(f'Erreur preprocessing HTML: {e}')
return soup return soup

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = "GPL v3" __license__ = 'GPL v3'
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>" __copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
""" '''
https://www.asahi.com/ajw/ https://www.asahi.com/ajw/
""" '''
from datetime import datetime from datetime import datetime
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsahiShimbunEnglishNews(BasicNewsRecipe): class AsahiShimbunEnglishNews(BasicNewsRecipe):
title = "The Asahi Shimbun" title = 'The Asahi Shimbun'
__author__ = "Albert Aparicio Isarn" __author__ = 'Albert Aparicio Isarn'
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan." description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive" ' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
" coverage of cool Japan,focusing on manga, travel and other timely news.") ' coverage of cool Japan,focusing on manga, travel and other timely news.')
publisher = "The Asahi Shimbun Company" publisher = 'The Asahi Shimbun Company'
publication_type = "newspaper" publication_type = 'newspaper'
category = "news, japan" category = 'news, japan'
language = "en_JP" language = 'en_JP'
index = "https://www.asahi.com" index = 'https://www.asahi.com'
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png" masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
oldest_article = 3 oldest_article = 3
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_tags_before = {"id": "MainInner"} remove_tags_before = {'id': 'MainInner'}
remove_tags_after = {"class": "ArticleText"} remove_tags_after = {'class': 'ArticleText'}
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}] remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
def get_whats_new(self): def get_whats_new(self):
soup = self.index_to_soup(self.index + "/ajw/new") soup = self.index_to_soup(self.index + '/ajw/new')
news_section = soup.find("div", attrs={"class": "specialList"}) news_section = soup.find('div', attrs={'class': 'specialList'})
new_news = [] new_news = []
for item in news_section.findAll("li"): for item in news_section.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
new_news.append( new_news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return new_news return new_news
def get_top6(self, soup): def get_top6(self, soup):
top = soup.find("ul", attrs={"class": "top6"}) top = soup.find('ul', attrs={'class': 'top6'})
top6_news = [] top6_news = []
for item in top.findAll("li"): for item in top.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
top6_news.append( top6_news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return top6_news return top6_news
def get_section_news(self, soup): def get_section_news(self, soup):
news_grid = soup.find("ul", attrs={"class": "default"}) news_grid = soup.find('ul', attrs={'class': 'default'})
news = [] news = []
for item in news_grid.findAll("li"): for item in news_grid.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
news.append( news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return news return news
def get_section(self, section): def get_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section) soup = self.index_to_soup(self.index + '/ajw/' + section)
section_news_items = self.get_top6(soup) section_news_items = self.get_top6(soup)
section_news_items.extend(self.get_section_news(soup)) section_news_items.extend(self.get_section_news(soup))
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
return section_news_items return section_news_items
def get_special_section(self, section): def get_special_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section) soup = self.index_to_soup(self.index + '/ajw/' + section)
top = soup.find("div", attrs={"class": "Section"}) top = soup.find('div', attrs={'class': 'Section'})
special_news = [] special_news = []
for item in top.findAll("li"): for item in top.findAll('li'):
item_a = item.find("a") item_a = item.find('a')
text_split = item_a.text.strip().split("\n") text_split = item_a.text.strip().split('\n')
title = text_split[0] title = text_split[0]
description = text_split[1].strip() description = text_split[1].strip()
url = self.index + item_a["href"] url = self.index + item_a['href']
special_news.append( special_news.append(
{ {
"title": title, 'title': title,
"date": "", 'date': '',
"url": url, 'url': url,
"description": description, 'description': description,
} }
) )
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
feeds = [ feeds = [
("What's New", self.get_whats_new()), ("What's New", self.get_whats_new()),
("National Report", self.get_section("national_report")), ('National Report', self.get_section('national_report')),
("Politics", self.get_section("politics")), ('Politics', self.get_section('politics')),
("Business", self.get_section("business")), ('Business', self.get_section('business')),
("Asia & World - China", self.get_section("asia_world/china")), ('Asia & World - China', self.get_section('asia_world/china')),
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")), ('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")), ('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
("Asia & World - World", self.get_section("asia_world/world")), ('Asia & World - World', self.get_section('asia_world/world')),
("Sci & Tech", self.get_section("sci_tech")), ('Sci & Tech', self.get_section('sci_tech')),
("Culture - Style", self.get_section("culture/style")), ('Culture - Style', self.get_section('culture/style')),
# ("Culture - Cooking", self.get_section("culture/cooking")), # ("Culture - Cooking", self.get_section("culture/cooking")),
("Culture - Movies", self.get_section("culture/movies")), ('Culture - Movies', self.get_section('culture/movies')),
("Culture - Manga & Anime", self.get_section("culture/manga_anime")), ('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
("Travel", self.get_section("travel")), ('Travel', self.get_section('travel')),
("Sports", self.get_section("sports")), ('Sports', self.get_section('sports')),
("Opinion - Editorial", self.get_section("opinion/editorial")), ('Opinion - Editorial', self.get_section('opinion/editorial')),
("Opinion - Vox Populi", self.get_section("opinion/vox")), ('Opinion - Vox Populi', self.get_section('opinion/vox')),
("Opinion - Views", self.get_section("opinion/views")), ('Opinion - Views', self.get_section('opinion/views')),
("Special", self.get_special_section("special")), ('Special', self.get_special_section('special')),
] ]
return feeds return feeds

View File

@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsianReviewOfBooks(BasicNewsRecipe): class AsianReviewOfBooks(BasicNewsRecipe):
title = 'The Asian Review of Books' title = 'The Asian Review of Books'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa: E501
publisher = 'The Asian Review of Books' publisher = 'The Asian Review of Books'
category = 'literature, books, reviews, Asia' category = 'literature, books, reviews, Asia'
oldest_article = 30 oldest_article = 30
@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
publication_type = 'magazine' publication_type = 'magazine'
auto_cleanup = True auto_cleanup = True
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png' masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
extra_css = """ extra_css = '''
body{font-family: "Droid Serif", serif} body{font-family: "Droid Serif", serif}
.entry-title {font-family: "Playfair Display", serif} .entry-title {font-family: "Playfair Display", serif}
img {display: block} img {display: block}
""" '''
recipe_specific_options = { recipe_specific_options = {
'days': { 'days': {

View File

@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AstroNEWS(BasicNewsRecipe): class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS' title = u'AstroNEWS'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa: E501
category = 'astronomy, science' category = 'astronomy, science'
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8

View File

@ -12,7 +12,7 @@ test_article = None
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed' # test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
# {{{ parse article JSON # parse article JSON {{{
def process_image_block(lines, block): def process_image_block(lines, block):
caption = block.get('captionText') caption = block.get('captionText')
caption_lines = [] caption_lines = []

View File

@ -12,7 +12,7 @@ test_article = None
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed' # test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
# {{{ parse article JSON # parse article JSON {{{
def process_image_block(lines, block): def process_image_block(lines, block):
caption = block.get('captionText') caption = block.get('captionText')
caption_lines = [] caption_lines = []

View File

@ -11,7 +11,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>' __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'attac.es' title = u'attac.es'
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa: E501
url = 'http://www.attac.es' url = 'http://www.attac.es'
language = 'es' language = 'es'
tags = 'contrainformación, información alternativa' tags = 'contrainformación, información alternativa'

View File

@ -24,4 +24,3 @@ class WwwAvisen_dk(BasicNewsRecipe):
feeds = [ feeds = [
('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'), ('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'),
] ]

View File

@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg' masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
INDEX = 'http://www.buenosairesherald.com' INDEX = 'http://www.buenosairesherald.com'
extra_css = """ extra_css = '''
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
h1{font-family: Georgia,serif} h1{font-family: Georgia,serif}
#fecha{text-align: right; font-size: small} #fecha{text-align: right; font-size: small}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -22,21 +22,21 @@ class AdvancedUserRecipe1718382046(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
feeds = [ feeds = [
#Gardening # Gardening
('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'), ('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'),
('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'), ('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'),
('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'), ('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'),
('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'), ('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'),
('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'), ('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'),
('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'), ('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'),
#Backyard # Backyard
('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'), ('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'),
('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'), ('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'),
('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'), ('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'),
('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'), ('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'),
('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'), ('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'),
('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'), ('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'),
#DIY & Project # DIY & Project
('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'), ('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'),
('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'), ('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'),
('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'), ('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'),

View File

@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class BaikalJournal(BasicNewsRecipe): class BaikalJournal(BasicNewsRecipe):
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430' title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa: E501
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa: E501
category = 'blog' category = 'blog'
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg' cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
language = 'ru' language = 'ru'

View File

@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
title = u'Bangkok Post' title = u'Bangkok Post'
publisher = u'Post Publishing PCL' publisher = u'Post Publishing PCL'
category = u'News' category = u'News'
description = u'The world\'s window to Thailand' description = u"The world's window to Thailand"
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -26,7 +26,7 @@ class bar(BasicNewsRecipe):
prefixed_classes( prefixed_classes(
'text-story-m_story-tags__ story-footer-module__metype__' 'text-story-m_story-tags__ story-footer-module__metype__'
), ),
dict(name = 'svg') dict(name='svg')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
class barrons(BasicNewsRecipe): class barrons(BasicNewsRecipe):
title = 'Barron\'s Magazine' title = "Barron's Magazine"
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = ( description = (
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister ' "Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and ' "publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
'relevant statistics.' 'relevant statistics.'
) )
language = 'en_US' language = 'en_US'
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
recipe_specific_options = { recipe_specific_options = {
'date': { 'date': {
'short': 'The date of the edition to download (YYYYMMDD format)', 'short': 'The date of the edition to download (YYYYMMDD format)',
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.' 'long': "For example, 20240722.\nIf it didn't work, try again later."
} }
} }
@ -119,7 +119,7 @@ class barrons(BasicNewsRecipe):
byl = articles.find(**prefixed_classes('BarronsTheme--byline--')) byl = articles.find(**prefixed_classes('BarronsTheme--byline--'))
if byl: if byl:
desc += self.tag_to_string(byl) desc += self.tag_to_string(byl)
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--')) ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
if ttr: if ttr:
desc += self.tag_to_string(ttr) desc += self.tag_to_string(ttr)
summ = articles.find(**prefixed_classes('BarronsTheme--summary--')) summ = articles.find(**prefixed_classes('BarronsTheme--summary--'))
@ -127,7 +127,7 @@ class barrons(BasicNewsRecipe):
desc += ' | ' + self.tag_to_string(summ) desc += ' | ' + self.tag_to_string(summ)
self.log('\t', title, ' ', url, '\n\t', desc) self.log('\t', title, ' ', url, '\n\t', desc)
ans[section].append({'title': title, 'url': url, 'description': desc}) ans[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in ans.items()] return list(ans.items())
def print_version(self, url): def print_version(self, url):
return url.split('?')[0].replace('/articles/', '/amp/articles/') return url.split('?')[0].replace('/articles/', '/amp/articles/')

View File

@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
# Select / de-select the feeds you want in your ebook. # Select / de-select the feeds you want in your ebook.
feeds = [ feeds = [
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"), ('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"), ('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"), ('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"), # ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"), # ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"), # ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"), # ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"), # ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"), # ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"), ('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"), ('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
("Science/Environment", ('Science/Environment',
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"), 'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"), ('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"), ('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
("Entertainment/Arts", ('Entertainment/Arts',
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"), 'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"), # ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"), # ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"), ('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"), ('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"), ('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"), # ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"), # ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"), # ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"), # ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"), # ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
("Sport Front Page", ('Sport Front Page',
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"), 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"), # ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"), # ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"), # ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),

View File

@ -201,24 +201,24 @@ class BBCBrasilRecipe(BasicNewsRecipe):
conversion_options = {'smarten_punctuation': True} conversion_options = {'smarten_punctuation': True}
# Specify extra CSS - overrides ALL other CSS (IE. Added last). # Specify extra CSS - overrides ALL other CSS (IE. Added last).
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ extra_css = '''body { font-family: verdana, helvetica, sans-serif; }
.introduction, .first { font-weight: bold; } \ .introduction, .first { font-weight: bold; }
.cross-head { font-weight: bold; font-size: 125%; } \ .cross-head { font-weight: bold; font-size: 125%; }
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \ .cap, .caption { display: block; font-size: 80%; font-style: italic; }
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \ .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; }
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position,
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \ .correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block;
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \ text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; }
.story-date, .published, .datestamp { font-size: 80%; } \ .story-date, .published, .datestamp { font-size: 80%; }
table { width: 100%; } \ table { width: 100%; }
td img { display: block; margin: 5px auto; } \ td img { display: block; margin: 5px auto; }
ul { padding-top: 10px; } \ ul { padding-top: 10px; }
ol { padding-top: 10px; } \ ol { padding-top: 10px; }
li { padding-top: 5px; padding-bottom: 5px; } \ li { padding-top: 5px; padding-bottom: 5px; }
h1 { text-align: center; font-size: 175%; font-weight: bold; } \ h1 { text-align: center; font-size: 175%; font-weight: bold; }
h2 { text-align: center; font-size: 150%; font-weight: bold; } \ h2 { text-align: center; font-size: 150%; font-weight: bold; }
h3 { text-align: center; font-size: 125%; font-weight: bold; } \ h3 { text-align: center; font-size: 125%; font-weight: bold; }
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }' h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'''
# Remove various tag attributes to improve the look of the ebook pages. # Remove various tag attributes to improve the look of the ebook pages.
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
# Handle sports page urls type 01: # Handle sports page urls type 01:
if (url.find("go/rss/-/sport1/") != -1): if (url.find('go/rss/-/sport1/') != -1):
temp_url = url.replace("go/rss/-/", "") temp_url = url.replace('go/rss/-/', '')
# Handle sports page urls type 02: # Handle sports page urls type 02:
elif (url.find("go/rss/int/news/-/sport1/") != -1): elif (url.find('go/rss/int/news/-/sport1/') != -1):
temp_url = url.replace("go/rss/int/news/-/", "") temp_url = url.replace('go/rss/int/news/-/', '')
# Handle regular news page urls: # Handle regular news page urls:
else: else:
temp_url = url.replace("go/rss/int/news/-/", "") temp_url = url.replace('go/rss/int/news/-/', '')
# Always add "?print=true" to the end of the url. # Always add "?print=true" to the end of the url.
print_url = temp_url + "?print=true" print_url = temp_url + '?print=true'
return print_url return print_url

View File

@ -36,7 +36,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': [' g-w8']}) remove_tags_after = dict(name='div', attrs={'class': [' g-w8']})
remove_tags = [ remove_tags = [
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa: E501
] ]
feeds = [ feeds = [

View File

@ -134,7 +134,7 @@ if __name__ == '__main__':
class BBC(BasicNewsRecipe): class BBC(BasicNewsRecipe):
title = 'BBC News (fast)' title = 'BBC News (fast)'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa: E501
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True

View File

@ -21,7 +21,7 @@ class BBC(BasicNewsRecipe):
category = 'sport, news, UK, world' category = 'sport, news, UK, world'
language = 'en_GB' language = 'en_GB'
publication_type = 'newsportal' publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = { conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
class BeforeWeGo(BasicNewsRecipe): class BeforeWeGo(BasicNewsRecipe):
title = 'Before We Go' title = 'Before We Go'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa: E501
publisher = 'BEFOREWEGOBLOG' publisher = 'BEFOREWEGOBLOG'
category = 'blog' category = 'blog'
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg' # cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png' cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png'
language = 'en' language = 'en'
no_stylesheets = True no_stylesheets = True
@ -24,9 +24,9 @@ class BeforeWeGo(BasicNewsRecipe):
remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'}) remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'})
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'}) remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
# remove_tags_after = dict(name='article') # remove_tags_after = dict(name='article')
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}), dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),
dict(name='span', attrs={'class': 'meta-comment-count'}), dict(name='span', attrs={'class': 'meta-comment-count'}),
dict(name='p', attrs={'id': 'breadcrumbs'}) dict(name='p', attrs={'id': 'breadcrumbs'})

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe): class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat' title = 'Bellingcat'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa: E501
publisher = 'Stichting Bellingcat' publisher = 'Stichting Bellingcat'
category = 'blog' category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe): class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat' title = 'Bellingcat'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa: E501
publisher = 'Stichting Bellingcat' publisher = 'Stichting Bellingcat'
category = 'blog' category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe): class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat' title = 'Bellingcat'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa: E501
publisher = 'Stichting Bellingcat' publisher = 'Stichting Bellingcat'
category = 'blog' category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe): class Bellingcat(BasicNewsRecipe):
title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)' title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa: E501
publisher = 'Stichting Bellingcat' publisher = 'Stichting Bellingcat'
category = 'blog' category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe): class Bellingcat(BasicNewsRecipe):
title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)' title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa: E501
publisher = 'Stichting Bellingcat' publisher = 'Stichting Bellingcat'
category = 'blog' category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -17,13 +17,13 @@ class BenchmarkPl(BasicNewsRecipe):
extra_css = 'ul {list-style-type: none;}' extra_css = 'ul {list-style-type: none;}'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa: E501, RUF039
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: RUF039
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict( keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')] name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={ 'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa: E501
INDEX = 'http://www.benchmark.pl' INDEX = 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),

View File

@ -25,7 +25,6 @@ class bergfreunde_blog(BasicNewsRecipe):
__author__ = 'VoHe' __author__ = 'VoHe'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}

View File

@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict( dict(
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}), attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
dict(name='img', attrs={'alt': 'logo'}), dict(name='img', attrs={'alt': 'logo'}),
dict(name='div', attrs={'class': re.compile('infoEl')}), dict(name='div', attrs={'class': re.compile(r'infoEl')}),
dict(name='span', attrs={'class': re.compile('loupe')}) dict(name='span', attrs={'class': re.compile(r'loupe')})
] ]
remove_tags_after = [ remove_tags_after = [
dict(name='div', attrs={'itemprop': re.compile('articleBody')}) dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
feeds.append(("O'Reilly Factor", articles_shows)) feeds.append(("O'Reilly Factor", articles_shows))
if articles_columns: if articles_columns:
feeds.append(("Newspaper Column", articles_columns)) feeds.append(('Newspaper Column', articles_columns))
return feeds return feeds
@ -49,8 +49,7 @@ class BillOReilly(BasicNewsRecipe):
continue continue
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.billoreilly.com' + url + \ url = 'http://www.billoreilly.com' + url + '&dest=/pg/jsp/community/tvshowprint.jsp'
'&dest=/pg/jsp/community/tvshowprint.jsp'
self.log('\t\tFound article:', title) self.log('\t\tFound article:', title)
self.log('\t\t\t', url) self.log('\t\t\t', url)

View File

@ -57,8 +57,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghammail.co.uk') soup = self.index_to_soup('http://www.birminghammail.co.uk')
cov = soup.find(attrs={'src': re.compile( cov = soup.find(attrs={'src': re.compile(r'http://images.icnetwork.co.uk/upl/birm')})
'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov) cov = str(cov)
cov2 = re.findall( cov2 = re.findall(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)

View File

@ -27,15 +27,15 @@ class bleskRecipe(BasicNewsRecipe):
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png' cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
extra_css = """ extra_css = '''
""" '''
remove_attributes = [] remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']}) remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']}) remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']})
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}), remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
dict(name='div', attrs={'id': ['partHeader']}), dict(name='div', attrs={'id': ['partHeader']}),
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})] dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*', preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*',
re.DOTALL | re.IGNORECASE), lambda match: '</body>')] re.DOTALL | re.IGNORECASE), lambda match: '</body>')]

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe): class Blic(BasicNewsRecipe):
title = 'Blic' title = 'Blic'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa: E501
publisher = 'RINGIER d.o.o.' publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png' masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
language = 'sr' language = 'sr'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia, serif1, serif} body{font-family: Georgia, serif1, serif}
@ -35,13 +35,13 @@ class Blic(BasicNewsRecipe):
.potpis{font-size: x-small; color: gray} .potpis{font-size: x-small; color: gray}
.article_info{font-size: small} .article_info{font-size: small}
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block} img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039
remove_tags_before = dict(name='div', attrs={'id': 'article_info'}) remove_tags_before = dict(name='div', attrs={'id': 'article_info'})
remove_tags = [ remove_tags = [
dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])] dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])]

View File

@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg' masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
description = ( description = (
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,' 'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
' companies, events, and trends shaping today\'s complex, global economy.' " companies, events, and trends shaping today's complex, global economy."
) )
remove_empty_feeds = True remove_empty_feeds = True
@ -124,8 +124,8 @@ class Bloomberg(BasicNewsRecipe):
cat = '<div class="cat">' + data['primaryCategory'] + '</div>' cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
if 'abstract' in data and data['abstract'] and data['abstract'] is not None: if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>' subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
elif 'summary' in data and data['summary']: elif data.get('summary'):
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>' subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
if 'byline' in data and data['byline'] is not None: if 'byline' in data and data['byline'] is not None:

View File

@ -58,7 +58,7 @@ class Bloomberg(BasicNewsRecipe):
'Bloomberg delivers business and markets news, data, analysis, and video' 'Bloomberg delivers business and markets news, data, analysis, and video'
' to the world, featuring stories from Businessweek and Bloomberg News.' ' to the world, featuring stories from Businessweek and Bloomberg News.'
) )
oldest_article = 1.2 # days oldest_article = 1.2 # days
resolve_internal_links = True resolve_internal_links = True
remove_empty_feeds = True remove_empty_feeds = True
cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg' cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg'
@ -134,8 +134,8 @@ class Bloomberg(BasicNewsRecipe):
cat = '<div class="cat">' + data['primaryCategory'] + '</div>' cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
if 'abstract' in data and data['abstract'] and data['abstract'] is not None: if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>' subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
elif 'summary' in data and data['summary']: elif data.get('summary'):
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>' subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
if 'byline' in data and data['byline'] is not None: if 'byline' in data and data['byline'] is not None:

View File

@ -2,29 +2,29 @@ from urllib.parse import urljoin
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
_issue_url = "" _issue_url = ''
class BookforumMagazine(BasicNewsRecipe): class BookforumMagazine(BasicNewsRecipe):
title = "Bookforum" title = 'Bookforum'
description = ( description = (
"Bookforum is an American book review magazine devoted to books and " 'Bookforum is an American book review magazine devoted to books and '
"the discussion of literature. https://www.bookforum.com/print" 'the discussion of literature. https://www.bookforum.com/print'
) )
language = "en" language = 'en'
__author__ = "ping" __author__ = 'ping'
publication_type = "magazine" publication_type = 'magazine'
encoding = "utf-8" encoding = 'utf-8'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
auto_cleanup = False auto_cleanup = False
compress_news_images = True compress_news_images = True
compress_news_images_auto_size = 8 compress_news_images_auto_size = 8
keep_only_tags = [dict(class_="blog-article")] keep_only_tags = [dict(class_='blog-article')]
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])] remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
extra_css = """ extra_css = '''
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; } .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; } .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
display: block; max-width: 100%; height: auto; display: block; max-width: 100%; height: auto;
} }
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; } .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
""" '''
def preprocess_html(self, soup): def preprocess_html(self, soup):
# strip away links that's not needed # strip away links that's not needed
for ele in soup.select(".blog-article__header a"): for ele in soup.select('.blog-article__header a'):
ele.unwrap() ele.unwrap()
return soup return soup
def parse_index(self): def parse_index(self):
soup = self.index_to_soup( soup = self.index_to_soup(
_issue_url if _issue_url else "https://www.bookforum.com/print" _issue_url if _issue_url else 'https://www.bookforum.com/print'
) )
meta_ele = soup.find("meta", property="og:title") meta_ele = soup.find('meta', property='og:title')
if meta_ele: if meta_ele:
self.timefmt = f' [{meta_ele["content"]}]' self.timefmt = f' [{meta_ele["content"]}]'
cover_ele = soup.find("img", class_="toc-issue__cover") cover_ele = soup.find('img', class_='toc-issue__cover')
if cover_ele: if cover_ele:
self.cover_url = urljoin( self.cover_url = urljoin(
"https://www.bookforum.com", 'https://www.bookforum.com',
soup.find("img", class_="toc-issue__cover")["src"], soup.find('img', class_='toc-issue__cover')['src'],
) )
articles = {} articles = {}
for sect_ele in soup.find_all("div", class_="toc-articles__section"): for sect_ele in soup.find_all('div', class_='toc-articles__section'):
section_name = self.tag_to_string( section_name = self.tag_to_string(
sect_ele.find("a", class_="toc__anchor-links__link") sect_ele.find('a', class_='toc__anchor-links__link')
) )
for article_ele in sect_ele.find_all("article"): for article_ele in sect_ele.find_all('article'):
title_ele = article_ele.find("h1") title_ele = article_ele.find('h1')
sub_title_ele = article_ele.find(class_="toc-article__subtitle") sub_title_ele = article_ele.find(class_='toc-article__subtitle')
articles.setdefault(section_name, []).append( articles.setdefault(section_name, []).append(
{ {
"title": self.tag_to_string(title_ele), 'title': self.tag_to_string(title_ele),
"url": article_ele.find("a", class_="toc-article__link")[ 'url': article_ele.find('a', class_='toc-article__link')[
"href" 'href'
], ],
"description": self.tag_to_string(sub_title_ele) 'description': self.tag_to_string(sub_title_ele)
if sub_title_ele if sub_title_ele
else "", else '',
} }
) )
return articles.items() return articles.items()

View File

@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
language = 'da' language = 'da'
keep_only_tags = [ keep_only_tags = [
dict(name="h1", attrs={'itemprop': 'headline'}), dict(name='h1', attrs={'itemprop': 'headline'}),
dict(name="div", attrs={'itemprob': 'datePublished'}), dict(name='div', attrs={'itemprob': 'datePublished'}),
dict(name="div", attrs={'itemprop': 'articleBody'}), dict(name='div', attrs={'itemprop': 'articleBody'}),
] ]
# Feed are found here: # Feed are found here:

View File

@ -42,24 +42,24 @@ def class_startswith(*prefixes):
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true # From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
comics_to_fetch = { comics_to_fetch = {
"ADAM@HOME": 'ad', 'ADAM@HOME': 'ad',
"ARLO & JANIS": 'aj', 'ARLO & JANIS': 'aj',
# "CUL DE SAC": 'cds', # "CUL DE SAC": 'cds',
# "CURTIS": 'kfcrt', # "CURTIS": 'kfcrt',
"DILBERT": 'dt', 'DILBERT': 'dt',
"DOONESBURY": 'db', 'DOONESBURY': 'db',
"DUSTIN": 'kfdus', 'DUSTIN': 'kfdus',
"F MINUS": 'fm', 'F MINUS': 'fm',
"FOR BETTER OR WORSE": 'fb', 'FOR BETTER OR WORSE': 'fb',
# "GET FUZZY": 'gz', # "GET FUZZY": 'gz',
# "MOTHER GOOSE & GRIMM": 'tmmgg', # "MOTHER GOOSE & GRIMM": 'tmmgg',
# "JUMPSTART": 'jt', # "JUMPSTART": 'jt',
"MONTY": 'mt', 'MONTY': 'mt',
# "POOCH CAFE", # "POOCH CAFE",
"RHYMES WITH ORANGE": 'kfrwo', 'RHYMES WITH ORANGE': 'kfrwo',
# "ROSE IS ROSE": 'rr', # "ROSE IS ROSE": 'rr',
# "ZIPPY THE PINHEAD": 'kfzpy', # "ZIPPY THE PINHEAD": 'kfzpy',
"ZITS": 'kfzt' 'ZITS': 'kfzt'
} }
@ -77,10 +77,10 @@ def extract_json(raw_html):
def absolutize_url(url): def absolutize_url(url):
if url.startswith("//"): if url.startswith('//'):
return "https:" + url return 'https:' + url
if url.startswith('/'): if url.startswith('/'):
url = "https://www.bostonglobe.com" + url url = 'https://www.bostonglobe.com' + url
return url return url
@ -120,7 +120,7 @@ def main():
class BostonGlobeSubscription(BasicNewsRecipe): class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe" title = 'Boston Globe'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
description = 'The Boston Globe' description = 'The Boston Globe'
language = 'en_US' language = 'en_US'

View File

@ -12,6 +12,7 @@ def class_as_string(x):
x = ' '.join(x) x = ' '.join(x)
return x return x
def class_startswith(*prefixes): def class_startswith(*prefixes):
def q(x): def q(x):
@ -24,18 +25,19 @@ def class_startswith(*prefixes):
return dict(attrs={'class': q}) return dict(attrs={'class': q})
def absolutize_url(url): def absolutize_url(url):
if url.startswith("//"): if url.startswith('//'):
return "https:" + url return 'https:' + url
if url.startswith('/'): if url.startswith('/'):
url = "https://www.bostonglobe.com" + url url = 'https://www.bostonglobe.com' + url
return url return url
class BostonGlobePrint(BasicNewsRecipe): class BostonGlobePrint(BasicNewsRecipe):
title = "Boston Globe | Print Edition" title = 'Boston Globe | Print Edition'
__author__ = 'Kovid Goyal, unkn0wn' __author__ = 'Kovid Goyal, unkn0wn'
description = 'The Boston Globe - Today\'s Paper' description = "The Boston Globe - Today's Paper"
language = 'en_US' language = 'en_US'
keep_only_tags = [ keep_only_tags = [
@ -70,7 +72,7 @@ class BostonGlobePrint(BasicNewsRecipe):
for image in soup.findAll('img', src=True): for image in soup.findAll('img', src=True):
if image['src'].endswith('750.jpg'): if image['src'].endswith('750.jpg'):
return 'https:' + image['src'] return 'https:' + image['src']
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
@ -94,8 +96,8 @@ class BostonGlobePrint(BasicNewsRecipe):
desc = self.tag_to_string(d) desc = self.tag_to_string(d)
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in feeds_dict.items()] return list(feeds_dict.items())
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
soup = self.index_to_soup(raw_html) soup = self.index_to_soup(raw_html)

View File

@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1467715002(BasicNewsRecipe): class AdvancedUserRecipe1467715002(BasicNewsRecipe):
title = 'Breaking Mad' title = 'Breaking Mad'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa: E501
publisher = 'BreakingMad' publisher = 'BreakingMad'
category = 'news' category = 'news'
cover_url = u'http://breakingmad.me/images/logo.png' cover_url = u'http://breakingmad.me/images/logo.png'

View File

@ -5,7 +5,6 @@ from __future__ import print_function
__license__ = 'GPL v3' __license__ = 'GPL v3'
import datetime import datetime
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -23,40 +22,39 @@ class brewiarz(BasicNewsRecipe):
next_days = 1 next_days = 1
def parse_index(self): def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv", dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
"05": "v", "06": "vi", "07": "vii", "08": "viii", '05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
"09": "ix", "10": "x", "11": "xi", "12": "xii"} '09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek", weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"} 'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
now = datetime.datetime.now() now = datetime.datetime.now()
feeds = [] feeds = []
for i in range(0, self.next_days): for i in range(self.next_days):
url_date = now + datetime.timedelta(days=i) url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m") url_date_month = url_date.strftime('%m')
url_date_month_roman = dec2rom_dict[url_date_month] url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d") url_date_day = url_date.strftime('%d')
url_date_year = url_date.strftime("%Y")[2:] url_date_year = url_date.strftime('%Y')[2:]
url_date_weekday = url_date.strftime("%A") url_date_weekday = url_date.strftime('%A')
url_date_weekday_pl = weekday_dict[url_date_weekday] url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \ url = ('http://brewiarz.pl/' + url_date_month_roman + '_' +
url_date_year + "/" + url_date_day + url_date_month + "/index.php3" url_date_year + '/' + url_date_day + url_date_month + '/index.php3')
articles = self.parse_pages(url) articles = self.parse_pages(url)
if articles: if articles:
title = url_date_weekday_pl + " " + url_date_day + \ title = (url_date_weekday_pl + ' ' + url_date_day +
"." + url_date_month + "." + url_date_year '.' + url_date_month + '.' + url_date_year)
feeds.append((title, articles)) feeds.append((title, articles))
else: else:
sectors = self.get_sectors(url) sectors = self.get_sectors(url)
for subpage in sectors: for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + \ title = (url_date_weekday_pl + ' ' + url_date_day + '.' +
url_date_month + "." + url_date_year + " - " + subpage.string url_date_month + '.' + url_date_year + ' - ' + subpage.string)
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \ url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year +
"/" + url_date_day + url_date_month + \ '/' + url_date_day + url_date_month + '/' + subpage['href'])
"/" + subpage['href']
print(url) print(url)
articles = self.parse_pages(url) articles = self.parse_pages(url)
if articles: if articles:
@ -91,9 +89,8 @@ class brewiarz(BasicNewsRecipe):
sublinks = ol.findAll(name='a') sublinks = ol.findAll(name='a')
for sublink in sublinks: for sublink in sublinks:
link_title = self.tag_to_string( link_title = self.tag_to_string(
link) + " - " + self.tag_to_string(sublink) link) + ' - ' + self.tag_to_string(sublink)
link_url_print = re.sub( link_url_print = sublink['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link_url = url[:-10] + link_url_print link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title, current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''}) 'url': link_url, 'description': '', 'date': ''})
@ -102,8 +99,7 @@ class brewiarz(BasicNewsRecipe):
continue continue
else: else:
link_title = self.tag_to_string(link) link_title = self.tag_to_string(link)
link_url_print = re.sub( link_url_print = link['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
'php3', 'php3?kr=_druk&wr=lg&', link['href'])
link_url = url[:-10] + link_url_print link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title, current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''}) 'url': link_url, 'description': '', 'date': ''})
@ -145,7 +141,7 @@ class brewiarz(BasicNewsRecipe):
if x == tag: if x == tag:
break break
else: else:
print("Can't find", tag, "in", tag.parent) print("Can't find", tag, 'in', tag.parent)
continue continue
for r in reversed(tag.contents): for r in reversed(tag.contents):
tag.parent.insert(i, r) tag.parent.insert(i, r)

View File

@ -16,7 +16,7 @@ class AdvancedUserRecipe(BasicNewsRecipe):
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa: E501
dict(id=['header', 'artTools', 'context', 'interact', dict(id=['header', 'artTools', 'context', 'interact',
'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']), 'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
dict(name=['hjtrs', 'kud'])] dict(name=['hjtrs', 'kud'])]

View File

@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newsportal' publication_type = 'newsportal'
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg' masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
extra_css = """ extra_css = '''
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -15,8 +15,6 @@ class BusinessStandardPrint(BasicNewsRecipe):
language = 'en_IN' language = 'en_IN'
masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png'
encoding = 'utf-8' encoding = 'utf-8'
resolve_internal_links = True
remove_empty_feeds = True
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
@ -64,7 +62,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
if dt.weekday() == 6: if dt.weekday() == 6:
self.log.warn( self.log.warn(
'Business Standard Does Not Have A Print Publication On Sunday. The Reports' 'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.' " And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
) )
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)

View File

@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
# Insert feeds in specified order, if available # Insert feeds in specified order, if available
feedSort = ['Editor\'s Note', 'Editors note'] feedSort = ["Editor's Note", 'Editors note']
for i in feedSort: for i in feedSort:
if i in sections: if i in sections:
feeds.append((i, sections[i])) feeds.append((i, sections[i]))
@ -98,8 +98,7 @@ class BT(BasicNewsRecipe):
# Done with the sorted feeds # Done with the sorted feeds
for i in feedSort: for i in feedSort:
if i in sections: sections.pop(i, None)
del sections[i]
# Append what is left over... # Append what is left over...

View File

@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CACM(BasicNewsRecipe): class CACM(BasicNewsRecipe):
title = "ACM CACM Magazine" title = 'ACM CACM Magazine'
description = "Published on day 1 of every month." description = 'Published on day 1 of every month.'
language = 'en' language = 'en'
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
] ]
def get_cover_url(self): def get_cover_url(self):
""" '''
Parse out cover URL from cover page. Parse out cover URL from cover page.
Example: Example:
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668 From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
""" '''
soup = self.index_to_soup("https://cacm.acm.org/") soup = self.index_to_soup('https://cacm.acm.org/')
a_img = soup.find("a", class_="menuCover") a_img = soup.find('a', class_='menuCover')
img_url = a_img.img["src"] img_url = a_img.img['src']
img_url = img_url.split("?")[0] img_url = img_url.split('?')[0]
img_url = img_url.replace(".large", "") img_url = img_url.replace('.large', '')
return img_url return img_url

View File

@ -9,8 +9,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
title = u'Calcalist' title = u'Calcalist'
language = 'he' language = 'he'
__author__ = 'marbs' __author__ = 'marbs'
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa: E501
simultaneous_downloads = 5
remove_javascript = True remove_javascript = True
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 1 oldest_article = 1
@ -23,34 +22,33 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
dict(name='div', attrs={'class': 'ArticleBodyComponent'}), dict(name='div', attrs={'class': 'ArticleBodyComponent'}),
] ]
remove_tags = [dict(name='p', attrs={'text': ['&nbsp;']})] remove_tags = [dict(name='p', attrs={'text': ['&nbsp;']})]
max_articles_per_feed = 100
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<p>&nbsp;</p>', re.DOTALL | re.IGNORECASE), lambda match: '') (re.compile(r'<p>&nbsp;</p>', re.DOTALL | re.IGNORECASE), lambda match: '')
] ]
feeds = [ feeds = [
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"), (u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"), (u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"), (u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"), (u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"), (u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"), (u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"), (u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"), (u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"), (u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"), (u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"), (u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"), (u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"), (u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"), (u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"), (u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"), (u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"), (u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"), (u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"), (u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"), (u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"), (u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"), (u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"), (u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml") (u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
] ]

View File

@ -60,20 +60,20 @@ class CanWestPaper(BasicNewsRecipe):
] ]
# un-comment the following six lines for the Vancouver Province # un-comment the following six lines for the Vancouver Province
# title = u'Vancouver Province' # # title = u'Vancouver Province'
# url_prefix = 'http://www.theprovince.com' # # url_prefix = 'http://www.theprovince.com'
# description = u'News from Vancouver, BC' # # description = u'News from Vancouver, BC'
# std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' # # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
# logo_url = 'vplogo.jpg' # # logo_url = 'vplogo.jpg'
# fp_tag = 'CAN_TP' # # fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun # un-comment the following six lines for the Vancouver Sun
# title = u'Vancouver Sun' # # title = u'Vancouver Sun'
# url_prefix = 'http://www.vancouversun.com' # # url_prefix = 'http://www.vancouversun.com'
# description = u'News from Vancouver, BC' # # description = u'News from Vancouver, BC'
# std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' # # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
# logo_url = 'vslogo.jpg' # # logo_url = 'vslogo.jpg'
# fp_tag = 'CAN_VS' # # fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald # un-comment the following six lines for the Calgary Herald
title = u'Calgary Herald' title = u'Calgary Herald'
@ -90,7 +90,7 @@ class CanWestPaper(BasicNewsRecipe):
# # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' # # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
# # logo_url = 'ejlogo.jpg' # # logo_url = 'ejlogo.jpg'
# # fp_tag = 'CAN_EJ' # # fp_tag = 'CAN_EJ'
#
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
# # title = u'Ottawa Citizen' # # title = u'Ottawa Citizen'
# # url_prefix = 'http://www.ottawacitizen.com' # # url_prefix = 'http://www.ottawacitizen.com'
@ -98,7 +98,7 @@ class CanWestPaper(BasicNewsRecipe):
# # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' # # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
# # logo_url = 'oclogo.jpg' # # logo_url = 'oclogo.jpg'
# # fp_tag = 'CAN_OC' # # fp_tag = 'CAN_OC'
#
# un-comment the following six lines for the Montreal Gazette # un-comment the following six lines for the Montreal Gazette
# # title = u'Montreal Gazette' # # title = u'Montreal Gazette'
# # url_prefix = 'http://www.montrealgazette.com' # # url_prefix = 'http://www.montrealgazette.com'
@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):
@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe):
except: except:
while daysback < 7: while daysback < 7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \ cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \
str((date.today() - timedelta(days=daysback)).day) + \ str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg'
'/lg/' + self.fp_tag + '.jpg'
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
try: try:
br.open(cover) br.open(cover)
@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe):
continue continue
break break
if daysback == 7: if daysback == 7:
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
def fixChars(self, string): def fixChars(self, string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91", "", string) fixed = string.replace('\x91', '')
# Replace rsquo (\x92) # Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed) fixed = fixed.replace('\x92', '')
# Replace ldquo (\x93) # Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed) fixed = fixed.replace('\x93', '')
# Replace rdquo (\x94) # Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed) fixed = fixed.replace('\x94', '')
# Replace ndash (\x96) # Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed) fixed = fixed.replace('\x96', '')
# Replace mdash (\x97) # Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed) fixed = fixed.replace('\x97', '')
fixed = re.sub("&#x2019;", "", fixed) fixed = fixed.replace('&#x2019;', '')
return fixed return fixed
def massageNCXText(self, description): def massageNCXText(self, description):
@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''}) divtags = soup.findAll('div', attrs={'id': ''})
if divtags: if divtags:
for div in divtags: for div in divtags:
del(div['id']) del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'): if url.startswith('/'):
url = self.url_prefix + url url = self.url_prefix + url
if not url.startswith(self.url_prefix): if not url.startswith(self.url_prefix):
print("Rejected " + url) print('Rejected ' + url)
return return
if url in self.url_list: if url in self.url_list:
print("Rejected dup " + url) print('Rejected dup ' + url)
return return
self.url_list.append(url) self.url_list.append(url)
title = self.tag_to_string(atag, False) title = self.tag_to_string(atag, False)
@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe):
return return
dtag = adiv.find('div', 'content') dtag = adiv.find('div', 'content')
description = '' description = ''
print("URL " + url) print('URL ' + url)
print("TITLE " + title) print('TITLE ' + title)
if dtag is not None: if dtag is not None:
stag = dtag.span stag = dtag.span
if stag is not None: if stag is not None:
@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False) description = self.tag_to_string(stag, False)
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print('DESCRIPTION: ' + description)
if key not in articles: if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl): def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl) print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try: try:
soup = self.index_to_soup(self.url_prefix + keyurl) soup = self.index_to_soup(self.url_prefix + keyurl)
except: except:
print("Section: " + key + ' NOT FOUND') print('Section: ' + key + ' NOT FOUND')
return return
ans.append(key) ans.append(key)
mainsoup = soup.find('div', 'bodywrapper') mainsoup = soup.find('div', 'bodywrapper')
@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe):
for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}): for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}):
handle_article(wdiv, key) handle_article(wdiv, key)
for (k, url) in self.postmedia_index_pages: for k,url in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if key in articles] ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -4,7 +4,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1271446252(BasicNewsRecipe): class AdvancedUserRecipe1271446252(BasicNewsRecipe):
title = u'CanardPC' title = u'CanardPC'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100
language = 'fr' language = 'fr'
__author__ = 'zorgluf' __author__ = 'zorgluf'
max_articles_per_feed = 25 max_articles_per_feed = 25

Some files were not shown because too many files have changed in this diff Show More