Merge branch 'ruff-pep8-strict' of https://github.com/un-pogaz/calibre

This commit is contained in:
Kovid Goyal 2025-01-25 13:56:37 +05:30
commit 7e61ea2248
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
1492 changed files with 19225 additions and 19853 deletions

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
@ -27,7 +26,7 @@ for name, src in sources.items():
os.chdir(iconset)
try:
for sz in (16, 32, 128, 256, 512, 1024):
iname = 'icon_{0}x{0}.png'.format(sz)
iname = f'icon_{sz}x{sz}.png'
iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2)
if src.endswith('.svg'):
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
@ -36,7 +35,7 @@ for name, src in sources.items():
if sz == 512:
shutil.copy2(src, iname)
else:
subprocess.check_call(['convert', src, '-resize', '{0}x{0}'.format(sz), iname])
subprocess.check_call(['convert', src, '-resize', f'{sz}x{sz}', iname])
if sz > 16:
shutil.copy2(iname, iname2x)
if sz > 512:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
@ -24,7 +23,7 @@ for name, src in sources.items():
try:
names = []
for sz in (16, 24, 32, 48, 64, 256):
iname = os.path.join('ico_temp', '{0}x{0}.png'.format(sz))
iname = os.path.join('ico_temp', f'{sz}x{sz}.png')
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname])
if sz >= 128:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import argparse

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
@ -27,7 +26,7 @@ def clone_node(node, parent):
def merge():
base = os.path.dirname(os.path.abspath(__file__))
ans = etree.fromstring(
'<svg xmlns="%s" xmlns:xlink="%s"/>' % (SVG_NS, XLINK_NS),
f'<svg xmlns="{SVG_NS}" xmlns:xlink="{XLINK_NS}"/>',
parser=etree.XMLParser(
recover=True, no_network=True, resolve_entities=False
)
@ -43,14 +42,14 @@ def merge():
recover=True, no_network=True, resolve_entities=False
)
)
symbol = ans.makeelement('{%s}symbol' % SVG_NS)
symbol = ans.makeelement('{%s}symbol' % SVG_NS) # noqa: UP031
symbol.set('viewBox', svg.get('viewBox'))
symbol.set('id', 'icon-' + f.rpartition('.')[0])
for child in svg.iterchildren('*'):
clone_node(child, symbol)
ans.append(symbol)
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
return ans

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'

View File

@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# calibre documentation build configuration file, created by
# sphinx-quickstart.py on Sun Mar 23 01:23:55 2008.
#
@ -47,11 +45,11 @@ templates_path = ['templates']
source_suffix = {'.rst': 'restructuredtext'}
# The master toctree document.
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa: F821
# kill the warning about index/simple_index not being in a toctree
exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst']
exclude_patterns.append('cli-options-header.rst')
if tags.has('gettext'): # noqa
if tags.has('gettext'): # noqa: F821
# Do not exclude anything as the strings must be translated. This will
# generate a warning about the documents not being in a toctree, just ignore
# it.
@ -64,7 +62,7 @@ language = os.environ.get('CALIBRE_OVERRIDE_LANG', 'en')
def generated_langs():
try:
return os.listdir(os.path.join(base, 'generated'))
except EnvironmentError as e:
except OSError as e:
if e.errno != errno.ENOENT:
raise
return ()
@ -99,13 +97,13 @@ today_fmt = '%B %d, %Y'
unused_docs = ['global', 'cli/global']
locale_dirs = ['locale/']
title = '%s User Manual' % __appname__
title = f'{__appname__} User Manual'
needs_localization = language not in {'en', 'eng'}
if needs_localization:
import gettext
try:
t = gettext.translation('simple_index', locale_dirs[0], [language])
except IOError:
except OSError:
pass
else:
title = t.gettext(title)
@ -176,7 +174,7 @@ def sort_languages(x):
lc, name = x
if lc == language:
return ''
return sort_key(type(u'')(name))
return sort_key(str(name))
website = 'https://calibre-ebook.com'
@ -193,13 +191,13 @@ extlinks = {
}
del sort_languages, get_language
epub_author = u'Kovid Goyal'
epub_publisher = u'Kovid Goyal'
epub_copyright = u'© {} Kovid Goyal'.format(date.today().year)
epub_description = u'Comprehensive documentation for calibre'
epub_identifier = u'https://manual.calibre-ebook.com'
epub_scheme = u'url'
epub_uid = u'S54a88f8e9d42455e9c6db000e989225f'
epub_author = 'Kovid Goyal'
epub_publisher = 'Kovid Goyal'
epub_copyright = f'© {date.today().year} Kovid Goyal'
epub_description = 'Comprehensive documentation for calibre'
epub_identifier = 'https://manual.calibre-ebook.com'
epub_scheme = 'url'
epub_uid = 'S54a88f8e9d42455e9c6db000e989225f'
epub_tocdepth = 4
epub_tocdup = True
epub_cover = ('epub_cover.jpg', 'epub_cover_template.html')
@ -255,5 +253,5 @@ latex_show_pagerefs = True
latex_show_urls = 'footnote'
latex_elements = {
'papersize':'letterpaper',
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'),
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), # noqa: UP031
}

View File

@ -195,13 +195,13 @@ details and examples.
lines = []
for cmd in COMMANDS:
parser = option_parser_for(cmd)()
lines += ['.. _calibredb-%s-%s:' % (language, cmd), '']
lines += [f'.. _calibredb-{language}-{cmd}:', '']
lines += [cmd, '~'*20, '']
usage = parser.usage.strip()
usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()]
usage = usage.replace('%prog', 'calibredb').splitlines()
cmdline = ' '+usage[0]
usage = usage[1:]
usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage]
usage = [re.sub(rf'({cmd})([^a-zA-Z0-9])', r':command:`\1`\2', i) for i in usage]
lines += ['.. code-block:: none', '', cmdline, '']
lines += usage
groups = [(None, None, parser.option_list)]
@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
parser, plumber = create_option_parser(['ebook-convert',
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "INPUT OPTIONS"]
parser.option_groups if g.title == 'INPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
'dummyi.'+pl.file_type, '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "OUTPUT OPTIONS"]
parser.option_groups if g.title == 'OUTPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
@ -257,7 +257,7 @@ def generate_ebook_convert_help(preamble, app):
def update_cli_doc(name, raw, language):
if isinstance(raw, bytes):
raw = raw.decode('utf-8')
path = 'generated/%s/%s.rst' % (language, name)
path = f'generated/{language}/{name}.rst'
old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else ''
if not os.path.exists(path) or old_raw != raw:
import difflib
@ -352,7 +352,7 @@ def cli_docs(language):
usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()]
cmdline = usage[0]
usage = usage[1:]
usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage]
usage = [i.replace(cmd, f':command:`{cmd}`') for i in usage]
usage = '\n'.join(usage)
preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage)
if cmd == 'ebook-convert':
@ -382,7 +382,7 @@ def template_docs(language):
def localized_path(app, langcode, pagename):
href = app.builder.get_target_uri(pagename)
href = re.sub(r'generated/[a-z]+/', 'generated/%s/' % langcode, href)
href = re.sub(r'generated/[a-z]+/', f'generated/{langcode}/', href)
prefix = '/'
if langcode != 'en':
prefix += langcode + '/'
@ -397,7 +397,7 @@ def add_html_context(app, pagename, templatename, context, *args):
def guilabel_role(typ, rawtext, text, *args, **kwargs):
from sphinx.roles import GUILabel
text = text.replace(u'->', u'\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
text = text.replace('->', '\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
return GUILabel()(typ, rawtext, text, *args, **kwargs)
@ -405,7 +405,7 @@ def setup_man_pages(app):
documented_cmds = get_cli_docs()[0]
man_pages = []
for cmd, option_parser in documented_cmds:
path = 'generated/%s/%s' % (app.config.language, cmd)
path = f'generated/{app.config.language}/{cmd}'
man_pages.append((
path, cmd, cmd, 'Kovid Goyal', 1
))

View File

@ -49,8 +49,8 @@ class EPUBHelpBuilder(EpubBuilder):
imgname = container.href_to_name(img.get('src'), name)
fmt, width, height = identify(container.raw_data(imgname))
if width == -1:
raise ValueError('Failed to read size of: %s' % imgname)
img.set('style', 'width: %dpx; height: %dpx' % (width, height))
raise ValueError(f'Failed to read size of: {imgname}')
img.set('style', f'width: {width}px; height: {height}px')
def fix_opf(self, container):
spine_names = {n for n, l in container.spine_names}
@ -75,7 +75,7 @@ class EPUBHelpBuilder(EpubBuilder):
# Ensure that the cover-image property is set
cover_id = rmap['_static/' + self.config.epub_cover[0]]
for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)):
for item in container.opf_xpath(f'//opf:item[@id="{cover_id}"]'):
item.set('properties', 'cover-image')
for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'):
item.set('properties', 'svg calibre:title-page')

View File

@ -32,7 +32,7 @@ class DemoTool(Tool):
def create_action(self, for_toolbar=True):
# Create an action, this will be added to the plugins toolbar and
# the plugins menu
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa: F821
if not for_toolbar:
# Register a keyboard shortcut for this toolbar action. We only
# register it for the action created for the menu, not the toolbar,

View File

@ -76,5 +76,3 @@ class InterfacePluginDemo(InterfaceActionBase):
ac = self.actual_plugin_
if ac is not None:
ac.apply_settings()

View File

@ -55,7 +55,7 @@ class DemoDialog(QDialog):
self.l.addWidget(self.view_button)
self.update_metadata_button = QPushButton(
'Update metadata in a book\'s files', self)
"Update metadata in a book's files", self)
self.update_metadata_button.clicked.connect(self.update_metadata)
self.l.addWidget(self.update_metadata_button)

View File

@ -54,8 +54,8 @@ class checkbox(nodes.Element):
def visit_checkbox(self, node):
cid = node['ids'][0]
node['classes'] = []
self.body.append('<input id="{0}" type="checkbox" />'
'<label for="{0}">&nbsp;</label>'.format(cid))
self.body.append(f'<input id="{cid}" type="checkbox" />'
f'<label for="{cid}">&nbsp;</label>')
def modify_li(li):
@ -66,7 +66,7 @@ def modify_li(li):
li['classes'].append('leaf-node')
else:
c = checkbox()
c['ids'] = ['collapse-checkbox-{}'.format(next(id_counter))]
c['ids'] = [f'collapse-checkbox-{next(id_counter)}']
li.insert(0, c)

View File

@ -89,5 +89,6 @@ def generate_template_language_help(language, log):
a(POSTAMBLE)
return ''.join(output)
if __name__ == '__main__':
generate_template_language_help()

View File

@ -21,10 +21,11 @@ quote-style = 'single'
[tool.ruff.lint]
ignore = ['E402', 'E722', 'E741']
select = ['E', 'F', 'I', 'W', 'INT']
select = ['E', 'F', 'I', 'W', 'INT', 'PIE794']
unfixable = ['PIE794']
[tool.ruff.lint.per-file-ignores]
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501', 'W191']
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501']
"src/qt/*.py" = ['I']
"src/qt/*.pyi" = ['I']

View File

@ -17,6 +17,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
use_archive = True
def E(parent, name, text='', **attrs):
ans = parent.makeelement(name, **attrs)
ans.text = text
@ -60,8 +61,8 @@ if use_archive:
data = json.loads(raw)
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
date = data['dateModified']
@ -95,7 +96,7 @@ else:
for child in tuple(body):
body.remove(child)
article = E(body, 'article')
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', replace_entities(data['headline']))
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
if data['dateline'] is None:
@ -157,7 +158,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal"
__author__ = 'Kovid Goyal'
description = (
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
'an unworthy, timid ignorance obstructing our progress.”'
@ -170,7 +171,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={
'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -224,13 +225,13 @@ class Economist(BasicNewsRecipe):
def parse_index(self):
# return self.economist_test_article()
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is None:
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
data = json.loads(script_tag.string)
content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1]
query = {
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501
'operationName': 'HubsDataQuery',
'variables': '{{"id":"/content/{}","size":40}}'.format(content_id),
}
@ -247,22 +248,22 @@ class Economist(BasicNewsRecipe):
self.description = data['description']
feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"):
for part in safe_dict(data, 'hasPart', 'parts'):
section = part['title']
self.log(section)
for art in safe_dict(part, "hasPart", "parts"):
title = safe_dict(art, "title")
desc = safe_dict(art, "rubric") or ''
sub = safe_dict(art, "flyTitle") or ''
for art in safe_dict(part, 'hasPart', 'parts'):
title = safe_dict(art, 'title')
desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(art).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc})
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()]
return list(feeds_dict.items())
def populate_article_metadata(self, article, soup, first):
article.url = soup.find('h1')['title']
@ -311,26 +312,26 @@ class Economist(BasicNewsRecipe):
return ans
def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None:
data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "collections"):
section = safe_dict(coll, "headline") or ''
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
section = safe_dict(coll, 'headline') or ''
self.log(section)
articles = []
for part in safe_dict(coll, "hasPart", "parts"):
title = safe_dict(part, "headline") or ''
url = safe_dict(part, "url", "canonical") or ''
for part in safe_dict(coll, 'hasPart', 'parts'):
title = safe_dict(part, 'headline') or ''
url = safe_dict(part, 'url', 'canonical') or ''
if not title or not url:
continue
desc = safe_dict(part, "description") or ''
sub = safe_dict(part, "subheadline") or ''
desc = safe_dict(part, 'description') or ''
sub = safe_dict(part, 'subheadline') or ''
if sub:
desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
@ -341,7 +342,6 @@ class Economist(BasicNewsRecipe):
# }}}
def preprocess_raw_html(self, raw, url):
# open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
if use_archive:
@ -358,9 +358,9 @@ class Economist(BasicNewsRecipe):
cleanup_html_article(root)
if '/interactive/' in url:
return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \
+ 'This article is supposed to be read in a browser' \
+ '</em></article></body></html>'
return ('<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>'
'This article is supposed to be read in a browser.'
'</em></article></body></html>')
for div in root.xpath('//div[@class="lazy-image"]'):
noscript = list(div.iter('noscript'))

View File

@ -36,22 +36,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa: E501
]
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
remove_tags_after = dict(
name='div', attrs={'class': ['related-news', 'col']})
remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']})
remove_tags = [
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
]
extra_css = """
extra_css = '''
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
"""
'''
preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]

View File

@ -9,8 +9,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class Minutes(BasicNewsRecipe):

View File

@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
def parse_index(self):
feeds = []
for title, url in [
("They Draw and Cook", "http://www.theydrawandcook.com/")
('They Draw and Cook', 'http://www.theydrawandcook.com/')
]:
articles = self.make_links(url)
if articles:

View File

@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheMITPressReader(BasicNewsRecipe):
title = "The MIT Press Reader"
title = 'The MIT Press Reader'
__author__ = 'yodha8'
language = 'en'
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors."
" This recipe pulls articles from the past 7 days.")
description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
' This recipe pulls articles from the past 7 days.')
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True

View File

@ -38,9 +38,9 @@ class ABCNews(BasicNewsRecipe):
if d and isinstance(d, str):
self.oldest_article = float(d)
# auto_cleanup = True # enable this as a backup option if recipe stops working
# auto_cleanup = True # enable this as a backup option if recipe stops working
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
no_stylesheets = True
remove_javascript = True
@ -57,7 +57,7 @@ class ABCNews(BasicNewsRecipe):
# Clear out all the unwanted html tags:
# ************************************
remove_tags = [
# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)})
# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)})
{
'name': ['meta', 'link', 'noscript', 'aside']
},
@ -98,12 +98,12 @@ class ABCNews(BasicNewsRecipe):
('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'),
('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'),
('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'),
# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line
# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line
# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line
# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line
# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line
# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line
# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line
# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line
# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line
# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line
# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line
# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line
# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line
# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line
# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line
# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line
]

View File

@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = """
extra_css = '''
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
"""
'''
feeds = [

View File

@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
lambda m: '<title>' + m.group(1) + '</title>'),
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
extra_css = """
extra_css = '''
.chapo{font-style:italic; margin: 1em 0 0.5em}
"""
'''

View File

@ -85,9 +85,10 @@ class ADRecipe(BasicNewsRecipe):
def print_version(self, url):
parts = url.split('/')
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
+ parts[10] + '/' + parts[7] + '/print/' + \
parts[8] + '/' + parts[9] + '/' + parts[13]
print_url = 'http://' + '/'.join([
parts[2], parts[3], parts[4], parts[5], parts[10],
parts[7], 'print', parts[8], parts[9], parts[13],
])
return print_url

View File

@ -33,7 +33,7 @@ class Adevarul(BasicNewsRecipe):
]
remove_tags = [
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa: E501
]
remove_tags_after = [

View File

@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
INDEX = u'http://www.adventuregamers.com'
extra_css = """
extra_css = '''
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
.score_header{font-size: large; color: #50544A}
img{margin-bottom: 1em;}
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -8,13 +8,14 @@ def absurl(url):
if url.startswith('/'):
return 'https://www.afr.com' + url
class afr(BasicNewsRecipe):
title = 'Australian Financial Review'
__author__ = 'unkn0wn'
description = (
'For more than 65 years The Australian Financial Review has been the authority on business,'
' finance and investment news in Australia. It has a reputation for independent, award-winning '
'journalism and is essential reading for Australia\'s business and investor community.'
"journalism and is essential reading for Australia's business and investor community."
)
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
encoding = 'utf-8'
@ -24,7 +25,6 @@ class afr(BasicNewsRecipe):
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 25
no_stylesheets = True
remove_empty_feeds = True
remove_attributes = ['style', 'height', 'width']
keep_only_tags = [
@ -39,7 +39,7 @@ class afr(BasicNewsRecipe):
dict(name=['button', 'aside', 'svg']),
]
remove_tags_after= [ dict(name='aside', attrs={'id':'stickyContainer'})]
remove_tags_after= [dict(name='aside', attrs={'id':'stickyContainer'})]
extra_css = '''
#img-cap {font-size:small; text-align:center;}

View File

@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
'''
def default_cover(self, cover_file):
"""
'''
Crée une couverture personnalisée avec le logo
"""
'''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd]
month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}"
date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh')
# Image de base

View File

@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Agents(BasicNewsRecipe):
title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa: E501
__author__ = 'bugmen00t'
publisher = 'Project Media'
publication_type = 'news'

View File

@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
extra_css = """
extra_css = '''
body{font-family: Arial,sans-serif}
"""
'''
conversion_options = {
'comment': description, 'tags': category,
'publisher': publisher, 'language': language

View File

@ -22,7 +22,7 @@ class AlMasryAlyoum(BasicNewsRecipe):
category = 'News'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa: E501
keep_only_tags = [
dict(name='div', attrs={'class': ['article']})

View File

@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
title = title[0:120] + '...'
href = link.get('href')
if not href:
self._p("BAD HREF: " + str(link))
self._p('BAD HREF: ' + str(link))
return
self.queue_article_link(section, href, title)
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
age = (datetime.datetime.now() - date).days
if (age > self.oldest_article):
return "too old"
return 'too old'
return False
def scrape_article_date(self, soup):
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
def date_from_string(self, datestring):
try:
# eg: Posted September 17, 2014
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
except:
dt = None
@ -203,11 +203,10 @@ class AlMonitor(BasicNewsRecipe):
return self.tag_to_string(n).strip()
def _dbg_soup_node(self, node):
s = ' cls: ' + str(node.get('class')).strip() + \
' id: ' + str(node.get('id')).strip() + \
' role: ' + str(node.get('role')).strip() + \
' txt: ' + self.text(node)
return s
return (' cls: ' + str(node.get('class')).strip() +
' id: ' + str(node.get('id')).strip() +
' role: ' + str(node.get('role')).strip() +
' txt: ' + self.text(node))
def _p(self, msg):
curframe = inspect.currentframe()

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AlbertMohlersBlog(BasicNewsRecipe):
title = u'Albert Mohler\'s Blog'
title = u"Albert Mohler's Blog"
__author__ = 'Peter Grungi'
language = 'en'
oldest_article = 90
@ -13,8 +13,7 @@ class AlbertMohlersBlog(BasicNewsRecipe):
auto_cleanup = True
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
publisher = 'Albert Mohler'
language = 'en'
author = 'Albert Mohler'
feeds = [(u'Albert Mohler\'s Blog',
feeds = [(u"Albert Mohler's Blog",
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]

View File

@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
# Extract a list of dates from the page.
# Subset this out to the list of target dates for extraction.
date_list = []
for div in soup.findAll('div', attrs={'id': "dayheader"}):
for div in soup.findAll('div', attrs={'id': 'dayheader'}):
date_list.append(self.tag_to_string(div))
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
date_list_bool = [
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
# Process each paragraph one by one.
# Stop when the text of the previous div is not in the target date list.
for div in soup.findAll('div', attrs={'class': "mobile-front"}):
for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
for p in div.findAll('p'):
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
if p.find('a'):
title = self.tag_to_string(p)
link = p.find('a')['href']
if self.tag_to_string(p.findPreviousSibling('h3')
) == "Articles of Note":
) == 'Articles of Note':
articles_note.append({
'title': title,
'url': link,
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
'date': ''
})
elif self.tag_to_string(p.findPreviousSibling('h3')
) == "New Books":
) == 'New Books':
new_books.append({
'title': title,
'url': link,

View File

@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
def skip_ad_pages(self, soup):
tag = soup.find(attrs={'class': 'rodzaj'})
if tag and tag.a.string.lower().strip() == 'recenzje':
link = soup.find(text=re.compile('recenzuje'))
link = soup.find(text=re.compile(r'recenzuje'))
if link:
return self.index_to_soup(link.parent['href'], raw=True)

View File

@ -21,7 +21,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
remove_images = False
def get_cover_url(self):
"""Récupère dynamiquement l'URL de la dernière une depuis MLP"""
'''Récupère dynamiquement l'URL de la dernière une depuis MLP'''
br = self.get_browser()
try:
# Accéder à la page du magazine sur MLP
@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
self.log('Cover URL found:', cover_url)
return cover_url
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut')
self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
except Exception as e:
@ -92,7 +92,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
display_name = section_name.replace('-', ' ').title()
articles.append((display_name, feed_articles[:self.max_articles_per_feed]))
except Exception as e:
self.log.error(f'Error processing {section_name}: {str(e)}')
self.log.error(f'Error processing {section_name}: {e}')
continue
return articles
@ -133,7 +133,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
'description': ''
})
except Exception as e:
self.log.error(f'Error getting H1 title for {article_url}: {str(e)}')
self.log.error(f'Error getting H1 title for {article_url}: {e}')
continue
return feed_articles

View File

@ -21,11 +21,9 @@ class WwwAltomdata_dk(BasicNewsRecipe):
resolve_internal_links = True
remove_empty_feeds = True
auto_cleanup = True
language = 'da'
feeds = [
('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'),
('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'),
]

View File

@ -34,7 +34,7 @@ class AM730(BasicNewsRecipe):
description = 'http://www.am730.com.hk'
category = 'Chinese, News, Hong Kong'
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa: E501
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
dict(name='div',attrs={'class':'logo-container print-logo'}),
dict(name='div',attrs={'id':'galleria'})]
@ -53,12 +53,12 @@ class AM730(BasicNewsRecipe):
return self.masthead_url
def getAMSectionArticles(self, sectionName,url):
# print sectionName
# print(sectionName)
soup = self.index_to_soup(url)
articles = []
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
href = aTag.get('href',False)
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
if not href.encode('utf-8').startswith(url.encode('utf-8')):
continue # not in same section
title = href.split('/')[-1].split('-')[0]
@ -67,7 +67,7 @@ class AM730(BasicNewsRecipe):
print(title)
try:
if articles.index({'title':title,'url':href})>=0:
# print 'already added'
# print('already added')
continue # already added
except:
pass
@ -78,7 +78,7 @@ class AM730(BasicNewsRecipe):
break
if self.debug:
print(articles)
return (sectionName,articles)
return sectionName, articles
def parse_index(self):
# hard code sections
@ -91,10 +91,10 @@ class AM730(BasicNewsRecipe):
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
] # articles =[]
SectionsArticles=[]
for (title, url) in Sections:
for title, url in Sections:
if self.debug:
print(title)
print(url)
SectionsArticles.append(self.getAMSectionArticles(title,url))
# feeds.append(articles[0]['url'])
# feeds.append(articles[0]['url'])
return SectionsArticles

View File

@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
language = 'es_AR'
publication_type = 'newsportal'
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
extra_css = """
extra_css = '''
body{font-family: Roboto, sans-serif}
"""
'''
conversion_options = {
'comment': description,

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AmericanThinker(BasicNewsRecipe):
title = u'American Thinker'
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
__author__ = 'Walt Anthony'
publisher = 'Thomas Lifson'
category = 'news, politics, USA'
@ -33,7 +33,7 @@ class AmericanThinker(BasicNewsRecipe):
root = html5lib.parse(
clean_xml_chars(raw), treebuilder='lxml',
namespaceHTMLElements=False)
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa: E501
x.getparent().remove(x)
return etree.tostring(root, encoding='unicode')

View File

@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
def print_version(self, url):
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
return url.replace('/show/', '/print/') # 2014-02-27 AGE: update

View File

@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
language = 'en'
__author__ = 'unkn0wn'
description = (
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. '
"Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
'more than 5000 years of Egyptian history. Published bimonthly.'
)

View File

@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
index = 'https://epaper.andhrajyothy.com'
class andhra(BasicNewsRecipe):
title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్'
language = 'te'
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4:
continue
feeds_dict[section].append({"title": '', "url": url})
return [(section, articles) for section, articles in feeds_dict.items()]
feeds_dict[section].append({'title': '', 'url': url})
return list(feeds_dict.items())
def preprocess_raw_html(self, raw, *a):
data = json.loads(raw)

View File

@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
index = 'https://epaper.andhrajyothy.com'
class andhra(BasicNewsRecipe):
title = 'ఆంధ్రజ్యోతి - తెలంగాణ'
language = 'te'
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4:
continue
feeds_dict[section].append({"title": '', "url": url})
return [(section, articles) for section, articles in feeds_dict.items()]
feeds_dict[section].append({'title': '', 'url': url})
return list(feeds_dict.items())
def preprocess_raw_html(self, raw, *a):
data = json.loads(raw)

View File

@ -16,5 +16,5 @@ class Android_com_pl(BasicNewsRecipe):
remove_tags_after = [{'class': 'post-content'}]
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
preprocess_regexps = [
(re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
(re.compile(r'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
feeds = [(u'Android', u'http://android.com.pl/feed/')]

View File

@ -23,36 +23,36 @@ class AdvancedUserRecipe1718384518(BasicNewsRecipe):
auto_cleanup = True
feeds = [
#Phones
# Phones
('Phones', 'https://www.androidpolice.com/feed/phones/'),
('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'),
('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'),
('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'),
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
#Google
# Google
('Google', 'https://www.androidpolice.com/feed/google/'),
('News about Google', 'https://www.androidpolice.com/feed/news-google/'),
('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'),
('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'),
('Features about Google', 'https://www.androidpolice.com/feed/features-google/'),
#Operating Systems
# Operating Systems
('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'),
('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'),
('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'),
('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'),
#Chromebooks
# Chromebooks
('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'),
('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'),
('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'),
('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'),
#Gadgets
# Gadgets
('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'),
('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'),
('Audio', 'https://www.androidpolice.com/feed/tag/audio/'),
('Accessories', 'https://www.androidpolice.com/feed/accessories/'),
('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'),
('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'),
#Reviews
# Reviews
('Reviews', 'https://www.androidpolice.com/feed/reviews/'),
('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'),
('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'),

View File

@ -32,13 +32,10 @@ class AnimalPolitico(BasicNewsRecipe):
def parse_index(self):
soup = self.index_to_soup('http://www.animalpolitico.com/')
articles = []
for a in soup(**{
'name': 'a',
'attrs': {
for a in soup(name='a', attrs={
'href': True, 'title': True,
'data-author': True, 'data-type': True,
'data-home-title': True
}
}):
title = a['title']
url = a['href']

View File

@ -19,8 +19,6 @@ class AmericanProspect(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
keep_only_tags = [
dict(id=['title', 'content']),
]

View File

@ -18,8 +18,6 @@ class Arbetaren_SE(BasicNewsRecipe):
encoding = 'utf-8'
language = 'sv'
auto_cleanup = True
auto_cleanup_keep = '//div[@class="thumbnail"]'
auto_cleanup_keep = '//div[@id="article-image"]'
auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
auto_cleanup_keep = '//div[@class="thumbnail"]|//div[@id="article-image"]|//span[@class="important"]'
feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')]

View File

@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
(u"BC", u"https://www.arcamax.com/thefunnies/bc"),
(u'BC', u'https://www.arcamax.com/thefunnies/bc'),
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"),
(u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"),
(u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"),
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"),
(u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
(u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"),
(u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"),
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"),
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"),
(u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
(u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
(u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
]:
self.log('Finding strips for:', title)
articles = self.make_links(url, title)

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ArretSurImages(BasicNewsRecipe):
title = 'Arrêt sur Images'
description = 'Site français d\'analyse des médias'
description = "Site français d'analyse des médias"
language = 'fr'
encoding = 'utf-8'
needs_subscription = True
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
]
def default_cover(self, cover_file):
"""
'''
Crée une couverture personnalisée avec le logo ASI
"""
'''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
weekday = french_weekday[wkd]
month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}"
date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh')
img = QImage(1400, 1920, QImage.Format_RGB888)
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
print('Authentification réussie')
else:
print('Échec de l\'authentification - Vérifiez vos identifiants')
print("Échec de l'authentification - Vérifiez vos identifiants")
except Exception as e:
print(f'Erreur lors de l\'authentification: {str(e)}')
print(f"Erreur lors de l'authentification: {e}")
return br
def get_article_url(self, article):
@ -162,7 +162,7 @@ class ArretSurImages(BasicNewsRecipe):
</html>
'''
except Exception as e:
print(f'Erreur preprocessing HTML: {str(e)}')
print(f'Erreur preprocessing HTML: {e}')
return raw_html
def preprocess_html(self, soup):
@ -186,11 +186,11 @@ class ArretSurImages(BasicNewsRecipe):
else:
tag.replace_with(img_tag)
except Exception as e:
print(f'Erreur processing image: {str(e)}')
print(f'Erreur processing image: {e}')
tag.decompose()
else:
tag.decompose()
return soup
except Exception as e:
print(f'Erreur preprocessing HTML: {str(e)}')
print(f'Erreur preprocessing HTML: {e}')
return soup

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = "GPL v3"
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
__license__ = 'GPL v3'
__copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
"""
'''
https://www.asahi.com/ajw/
"""
'''
from datetime import datetime
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsahiShimbunEnglishNews(BasicNewsRecipe):
title = "The Asahi Shimbun"
__author__ = "Albert Aparicio Isarn"
title = 'The Asahi Shimbun'
__author__ = 'Albert Aparicio Isarn'
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
" coverage of cool Japan,focusing on manga, travel and other timely news.")
publisher = "The Asahi Shimbun Company"
publication_type = "newspaper"
category = "news, japan"
language = "en_JP"
description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
' coverage of cool Japan,focusing on manga, travel and other timely news.')
publisher = 'The Asahi Shimbun Company'
publication_type = 'newspaper'
category = 'news, japan'
language = 'en_JP'
index = "https://www.asahi.com"
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
index = 'https://www.asahi.com'
masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
oldest_article = 3
max_articles_per_feed = 40
no_stylesheets = True
remove_javascript = True
remove_tags_before = {"id": "MainInner"}
remove_tags_after = {"class": "ArticleText"}
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
remove_tags_before = {'id': 'MainInner'}
remove_tags_after = {'class': 'ArticleText'}
remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
def get_whats_new(self):
soup = self.index_to_soup(self.index + "/ajw/new")
news_section = soup.find("div", attrs={"class": "specialList"})
soup = self.index_to_soup(self.index + '/ajw/new')
news_section = soup.find('div', attrs={'class': 'specialList'})
new_news = []
for item in news_section.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in news_section.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
new_news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return new_news
def get_top6(self, soup):
top = soup.find("ul", attrs={"class": "top6"})
top = soup.find('ul', attrs={'class': 'top6'})
top6_news = []
for item in top.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in top.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
top6_news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return top6_news
def get_section_news(self, soup):
news_grid = soup.find("ul", attrs={"class": "default"})
news_grid = soup.find('ul', attrs={'class': 'default'})
news = []
for item in news_grid.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in news_grid.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return news
def get_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section)
soup = self.index_to_soup(self.index + '/ajw/' + section)
section_news_items = self.get_top6(soup)
section_news_items.extend(self.get_section_news(soup))
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
return section_news_items
def get_special_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section)
top = soup.find("div", attrs={"class": "Section"})
soup = self.index_to_soup(self.index + '/ajw/' + section)
top = soup.find('div', attrs={'class': 'Section'})
special_news = []
for item in top.findAll("li"):
item_a = item.find("a")
for item in top.findAll('li'):
item_a = item.find('a')
text_split = item_a.text.strip().split("\n")
text_split = item_a.text.strip().split('\n')
title = text_split[0]
description = text_split[1].strip()
url = self.index + item_a["href"]
url = self.index + item_a['href']
special_news.append(
{
"title": title,
"date": "",
"url": url,
"description": description,
'title': title,
'date': '',
'url': url,
'description': description,
}
)
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
feeds = [
("What's New", self.get_whats_new()),
("National Report", self.get_section("national_report")),
("Politics", self.get_section("politics")),
("Business", self.get_section("business")),
("Asia & World - China", self.get_section("asia_world/china")),
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
("Asia & World - World", self.get_section("asia_world/world")),
("Sci & Tech", self.get_section("sci_tech")),
("Culture - Style", self.get_section("culture/style")),
('National Report', self.get_section('national_report')),
('Politics', self.get_section('politics')),
('Business', self.get_section('business')),
('Asia & World - China', self.get_section('asia_world/china')),
('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
('Asia & World - World', self.get_section('asia_world/world')),
('Sci & Tech', self.get_section('sci_tech')),
('Culture - Style', self.get_section('culture/style')),
# ("Culture - Cooking", self.get_section("culture/cooking")),
("Culture - Movies", self.get_section("culture/movies")),
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
("Travel", self.get_section("travel")),
("Sports", self.get_section("sports")),
("Opinion - Editorial", self.get_section("opinion/editorial")),
("Opinion - Vox Populi", self.get_section("opinion/vox")),
("Opinion - Views", self.get_section("opinion/views")),
("Special", self.get_special_section("special")),
('Culture - Movies', self.get_section('culture/movies')),
('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
('Travel', self.get_section('travel')),
('Sports', self.get_section('sports')),
('Opinion - Editorial', self.get_section('opinion/editorial')),
('Opinion - Vox Populi', self.get_section('opinion/vox')),
('Opinion - Views', self.get_section('opinion/views')),
('Special', self.get_special_section('special')),
]
return feeds

View File

@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsianReviewOfBooks(BasicNewsRecipe):
title = 'The Asian Review of Books'
__author__ = 'Darko Miletic'
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa: E501
publisher = 'The Asian Review of Books'
category = 'literature, books, reviews, Asia'
oldest_article = 30
@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
publication_type = 'magazine'
auto_cleanup = True
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
extra_css = """
extra_css = '''
body{font-family: "Droid Serif", serif}
.entry-title {font-family: "Playfair Display", serif}
img {display: block}
"""
'''
recipe_specific_options = {
'days': {

View File

@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS'
__author__ = 'fenuks'
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa: E501
category = 'astronomy, science'
language = 'pl'
oldest_article = 8

View File

@ -12,7 +12,7 @@ test_article = None
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
# {{{ parse article JSON
# parse article JSON {{{
def process_image_block(lines, block):
caption = block.get('captionText')
caption_lines = []

View File

@ -12,7 +12,7 @@ test_article = None
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
# {{{ parse article JSON
# parse article JSON {{{
def process_image_block(lines, block):
caption = block.get('captionText')
caption_lines = []

View File

@ -11,7 +11,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
__license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'attac.es'
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa: E501
url = 'http://www.attac.es'
language = 'es'
tags = 'contrainformación, información alternativa'

View File

@ -24,4 +24,3 @@ class WwwAvisen_dk(BasicNewsRecipe):
feeds = [
('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'),
]

View File

@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
publication_type = 'newspaper'
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
INDEX = 'http://www.buenosairesherald.com'
extra_css = """
extra_css = '''
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
h1{font-family: Georgia,serif}
#fecha{text-align: right; font-size: small}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -22,21 +22,21 @@ class AdvancedUserRecipe1718382046(BasicNewsRecipe):
auto_cleanup = True
feeds = [
#Gardening
# Gardening
('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'),
('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'),
('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'),
('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'),
('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'),
('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'),
#Backyard
# Backyard
('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'),
('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'),
('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'),
('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'),
('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'),
('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'),
#DIY & Project
# DIY & Project
('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'),
('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'),
('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'),

View File

@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class BaikalJournal(BasicNewsRecipe):
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
__author__ = 'bugmen00t'
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa: E501
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa: E501
category = 'blog'
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
language = 'ru'

View File

@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
title = u'Bangkok Post'
publisher = u'Post Publishing PCL'
category = u'News'
description = u'The world\'s window to Thailand'
description = u"The world's window to Thailand"
oldest_article = 7
max_articles_per_feed = 100

View File

@ -26,7 +26,7 @@ class bar(BasicNewsRecipe):
prefixed_classes(
'text-story-m_story-tags__ story-footer-module__metype__'
),
dict(name = 'svg')
dict(name='svg')
]
def preprocess_html(self, soup):

View File

@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
class barrons(BasicNewsRecipe):
title = 'Barron\'s Magazine'
title = "Barron's Magazine"
__author__ = 'unkn0wn'
description = (
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister '
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and '
"Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
"publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
'relevant statistics.'
)
language = 'en_US'
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYYMMDD format)',
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
'long': "For example, 20240722.\nIf it didn't work, try again later."
}
}
@ -127,7 +127,7 @@ class barrons(BasicNewsRecipe):
desc += ' | ' + self.tag_to_string(summ)
self.log('\t', title, ' ', url, '\n\t', desc)
ans[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in ans.items()]
return list(ans.items())
def print_version(self, url):
return url.split('?')[0].replace('/articles/', '/amp/articles/')

View File

@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
# Select / de-select the feeds you want in your ebook.
feeds = [
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"),
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"),
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"),
('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"),
("Science/Environment",
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"),
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"),
("Entertainment/Arts",
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
('Science/Environment',
'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
('Entertainment/Arts',
'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"),
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"),
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
("Sport Front Page",
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
('Sport Front Page',
'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),

View File

@ -201,24 +201,24 @@ class BBCBrasilRecipe(BasicNewsRecipe):
conversion_options = {'smarten_punctuation': True}
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
.introduction, .first { font-weight: bold; } \
.cross-head { font-weight: bold; font-size: 125%; } \
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
.story-date, .published, .datestamp { font-size: 80%; } \
table { width: 100%; } \
td img { display: block; margin: 5px auto; } \
ul { padding-top: 10px; } \
ol { padding-top: 10px; } \
li { padding-top: 5px; padding-bottom: 5px; } \
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
extra_css = '''body { font-family: verdana, helvetica, sans-serif; }
.introduction, .first { font-weight: bold; }
.cross-head { font-weight: bold; font-size: 125%; }
.cap, .caption { display: block; font-size: 80%; font-style: italic; }
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; }
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position,
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block;
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; }
.story-date, .published, .datestamp { font-size: 80%; }
table { width: 100%; }
td img { display: block; margin: 5px auto; }
ul { padding-top: 10px; }
ol { padding-top: 10px; }
li { padding-top: 5px; padding-bottom: 5px; }
h1 { text-align: center; font-size: 175%; font-weight: bold; }
h2 { text-align: center; font-size: 150%; font-weight: bold; }
h3 { text-align: center; font-size: 125%; font-weight: bold; }
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'''
# Remove various tag attributes to improve the look of the ebook pages.
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
def print_version(self, url):
# Handle sports page urls type 01:
if (url.find("go/rss/-/sport1/") != -1):
temp_url = url.replace("go/rss/-/", "")
if (url.find('go/rss/-/sport1/') != -1):
temp_url = url.replace('go/rss/-/', '')
# Handle sports page urls type 02:
elif (url.find("go/rss/int/news/-/sport1/") != -1):
temp_url = url.replace("go/rss/int/news/-/", "")
elif (url.find('go/rss/int/news/-/sport1/') != -1):
temp_url = url.replace('go/rss/int/news/-/', '')
# Handle regular news page urls:
else:
temp_url = url.replace("go/rss/int/news/-/", "")
temp_url = url.replace('go/rss/int/news/-/', '')
# Always add "?print=true" to the end of the url.
print_url = temp_url + "?print=true"
print_url = temp_url + '?print=true'
return print_url

View File

@ -36,7 +36,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': [' g-w8']})
remove_tags = [
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa: E501
]
feeds = [

View File

@ -134,7 +134,7 @@ if __name__ == '__main__':
class BBC(BasicNewsRecipe):
title = 'BBC News (fast)'
__author__ = 'Kovid Goyal'
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa: E501
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True

View File

@ -21,7 +21,7 @@ class BBC(BasicNewsRecipe):
category = 'sport, news, UK, world'
language = 'en_GB'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
class BeforeWeGo(BasicNewsRecipe):
title = 'Before We Go'
__author__ = 'bugmen00t'
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa: E501
publisher = 'BEFOREWEGOBLOG'
category = 'blog'
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png'
language = 'en'
no_stylesheets = True
@ -24,7 +24,7 @@ class BeforeWeGo(BasicNewsRecipe):
remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'})
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
# remove_tags_after = dict(name='article')
# remove_tags_after = dict(name='article')
remove_tags = [
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat'
__author__ = 'bugmen00t'
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa: E501
publisher = 'Stichting Bellingcat'
category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat'
__author__ = 'bugmen00t'
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa: E501
publisher = 'Stichting Bellingcat'
category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe):
title = 'Bellingcat'
__author__ = 'bugmen00t'
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa: E501
publisher = 'Stichting Bellingcat'
category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe):
title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
__author__ = 'bugmen00t'
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa: E501
publisher = 'Stichting Bellingcat'
category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Bellingcat(BasicNewsRecipe):
title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
__author__ = 'bugmen00t'
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa: E501
publisher = 'Stichting Bellingcat'
category = 'blog'
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'

View File

@ -17,13 +17,13 @@ class BenchmarkPl(BasicNewsRecipe):
extra_css = 'ul {list-style-type: none;}'
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa: E501, RUF039
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: RUF039
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
remove_tags_after = dict(id='article')
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={ 'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa: E501
INDEX = 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),

View File

@ -25,7 +25,6 @@ class bergfreunde_blog(BasicNewsRecipe):
__author__ = 'VoHe'
no_stylesheets = True
remove_javascript = True
remove_javascript = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}

View File

@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict(
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
dict(name='img', attrs={'alt': 'logo'}),
dict(name='div', attrs={'class': re.compile('infoEl')}),
dict(name='span', attrs={'class': re.compile('loupe')})
dict(name='div', attrs={'class': re.compile(r'infoEl')}),
dict(name='span', attrs={'class': re.compile(r'loupe')})
]
remove_tags_after = [
dict(name='div', attrs={'itemprop': re.compile('articleBody')})
dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
]
def preprocess_html(self, soup):

View File

@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
feeds.append(("O'Reilly Factor", articles_shows))
if articles_columns:
feeds.append(("Newspaper Column", articles_columns))
feeds.append(('Newspaper Column', articles_columns))
return feeds
@ -49,8 +49,7 @@ class BillOReilly(BasicNewsRecipe):
continue
if url.startswith('/'):
url = 'http://www.billoreilly.com' + url + \
'&dest=/pg/jsp/community/tvshowprint.jsp'
url = 'http://www.billoreilly.com' + url + '&dest=/pg/jsp/community/tvshowprint.jsp'
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)

View File

@ -57,8 +57,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghammail.co.uk')
cov = soup.find(attrs={'src': re.compile(
'http://images.icnetwork.co.uk/upl/birm')})
cov = soup.find(attrs={'src': re.compile(r'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov)
cov2 = re.findall(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)

View File

@ -27,8 +27,8 @@ class bleskRecipe(BasicNewsRecipe):
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
extra_css = '''
'''
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe):
title = 'Blic'
__author__ = 'Darko Miletic'
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa: E501
publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
language = 'sr'
publication_type = 'newspaper'
extra_css = """
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia, serif1, serif}
@ -35,13 +35,13 @@ class Blic(BasicNewsRecipe):
.potpis{font-size: x-small; color: gray}
.article_info{font-size: small}
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039
remove_tags_before = dict(name='div', attrs={'id': 'article_info'})
remove_tags = [
dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])]

View File

@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
description = (
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
' companies, events, and trends shaping today\'s complex, global economy.'
" companies, events, and trends shaping today's complex, global economy."
)
remove_empty_feeds = True
@ -124,8 +124,8 @@ class Bloomberg(BasicNewsRecipe):
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
elif 'summary' in data and data['summary']:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
elif data.get('summary'):
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
if 'byline' in data and data['byline'] is not None:

View File

@ -134,8 +134,8 @@ class Bloomberg(BasicNewsRecipe):
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
elif 'summary' in data and data['summary']:
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
elif data.get('summary'):
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
if 'byline' in data and data['byline'] is not None:

View File

@ -2,29 +2,29 @@ from urllib.parse import urljoin
from calibre.web.feeds.news import BasicNewsRecipe
_issue_url = ""
_issue_url = ''
class BookforumMagazine(BasicNewsRecipe):
title = "Bookforum"
title = 'Bookforum'
description = (
"Bookforum is an American book review magazine devoted to books and "
"the discussion of literature. https://www.bookforum.com/print"
'Bookforum is an American book review magazine devoted to books and '
'the discussion of literature. https://www.bookforum.com/print'
)
language = "en"
__author__ = "ping"
publication_type = "magazine"
encoding = "utf-8"
language = 'en'
__author__ = 'ping'
publication_type = 'magazine'
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
auto_cleanup = False
compress_news_images = True
compress_news_images_auto_size = 8
keep_only_tags = [dict(class_="blog-article")]
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
keep_only_tags = [dict(class_='blog-article')]
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
extra_css = """
extra_css = '''
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
display: block; max-width: 100%; height: auto;
}
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
"""
'''
def preprocess_html(self, soup):
# strip away links that's not needed
for ele in soup.select(".blog-article__header a"):
for ele in soup.select('.blog-article__header a'):
ele.unwrap()
return soup
def parse_index(self):
soup = self.index_to_soup(
_issue_url if _issue_url else "https://www.bookforum.com/print"
_issue_url if _issue_url else 'https://www.bookforum.com/print'
)
meta_ele = soup.find("meta", property="og:title")
meta_ele = soup.find('meta', property='og:title')
if meta_ele:
self.timefmt = f' [{meta_ele["content"]}]'
cover_ele = soup.find("img", class_="toc-issue__cover")
cover_ele = soup.find('img', class_='toc-issue__cover')
if cover_ele:
self.cover_url = urljoin(
"https://www.bookforum.com",
soup.find("img", class_="toc-issue__cover")["src"],
'https://www.bookforum.com',
soup.find('img', class_='toc-issue__cover')['src'],
)
articles = {}
for sect_ele in soup.find_all("div", class_="toc-articles__section"):
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
section_name = self.tag_to_string(
sect_ele.find("a", class_="toc__anchor-links__link")
sect_ele.find('a', class_='toc__anchor-links__link')
)
for article_ele in sect_ele.find_all("article"):
title_ele = article_ele.find("h1")
sub_title_ele = article_ele.find(class_="toc-article__subtitle")
for article_ele in sect_ele.find_all('article'):
title_ele = article_ele.find('h1')
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
articles.setdefault(section_name, []).append(
{
"title": self.tag_to_string(title_ele),
"url": article_ele.find("a", class_="toc-article__link")[
"href"
'title': self.tag_to_string(title_ele),
'url': article_ele.find('a', class_='toc-article__link')[
'href'
],
"description": self.tag_to_string(sub_title_ele)
'description': self.tag_to_string(sub_title_ele)
if sub_title_ele
else "",
else '',
}
)
return articles.items()

View File

@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
language = 'da'
keep_only_tags = [
dict(name="h1", attrs={'itemprop': 'headline'}),
dict(name="div", attrs={'itemprob': 'datePublished'}),
dict(name="div", attrs={'itemprop': 'articleBody'}),
dict(name='h1', attrs={'itemprop': 'headline'}),
dict(name='div', attrs={'itemprob': 'datePublished'}),
dict(name='div', attrs={'itemprop': 'articleBody'}),
]
# Feed are found here:

View File

@ -42,24 +42,24 @@ def class_startswith(*prefixes):
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
comics_to_fetch = {
"ADAM@HOME": 'ad',
"ARLO & JANIS": 'aj',
'ADAM@HOME': 'ad',
'ARLO & JANIS': 'aj',
# "CUL DE SAC": 'cds',
# "CURTIS": 'kfcrt',
"DILBERT": 'dt',
"DOONESBURY": 'db',
"DUSTIN": 'kfdus',
"F MINUS": 'fm',
"FOR BETTER OR WORSE": 'fb',
'DILBERT': 'dt',
'DOONESBURY': 'db',
'DUSTIN': 'kfdus',
'F MINUS': 'fm',
'FOR BETTER OR WORSE': 'fb',
# "GET FUZZY": 'gz',
# "MOTHER GOOSE & GRIMM": 'tmmgg',
# "JUMPSTART": 'jt',
"MONTY": 'mt',
'MONTY': 'mt',
# "POOCH CAFE",
"RHYMES WITH ORANGE": 'kfrwo',
'RHYMES WITH ORANGE': 'kfrwo',
# "ROSE IS ROSE": 'rr',
# "ZIPPY THE PINHEAD": 'kfzpy',
"ZITS": 'kfzt'
'ZITS': 'kfzt'
}
@ -77,10 +77,10 @@ def extract_json(raw_html):
def absolutize_url(url):
if url.startswith("//"):
return "https:" + url
if url.startswith('//'):
return 'https:' + url
if url.startswith('/'):
url = "https://www.bostonglobe.com" + url
url = 'https://www.bostonglobe.com' + url
return url
@ -120,7 +120,7 @@ def main():
class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe"
title = 'Boston Globe'
__author__ = 'Kovid Goyal'
description = 'The Boston Globe'
language = 'en_US'

View File

@ -12,6 +12,7 @@ def class_as_string(x):
x = ' '.join(x)
return x
def class_startswith(*prefixes):
def q(x):
@ -24,18 +25,19 @@ def class_startswith(*prefixes):
return dict(attrs={'class': q})
def absolutize_url(url):
if url.startswith("//"):
return "https:" + url
if url.startswith('//'):
return 'https:' + url
if url.startswith('/'):
url = "https://www.bostonglobe.com" + url
url = 'https://www.bostonglobe.com' + url
return url
class BostonGlobePrint(BasicNewsRecipe):
title = "Boston Globe | Print Edition"
title = 'Boston Globe | Print Edition'
__author__ = 'Kovid Goyal, unkn0wn'
description = 'The Boston Globe - Today\'s Paper'
description = "The Boston Globe - Today's Paper"
language = 'en_US'
keep_only_tags = [
@ -70,7 +72,7 @@ class BostonGlobePrint(BasicNewsRecipe):
for image in soup.findAll('img', src=True):
if image['src'].endswith('750.jpg'):
return 'https:' + image['src']
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
@ -94,8 +96,8 @@ class BostonGlobePrint(BasicNewsRecipe):
desc = self.tag_to_string(d)
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
feeds_dict[section].append({"title": title, "url": url, "description": desc})
return [(section, articles) for section, articles in feeds_dict.items()]
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
return list(feeds_dict.items())
def preprocess_raw_html(self, raw_html, url):
soup = self.index_to_soup(raw_html)

View File

@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1467715002(BasicNewsRecipe):
title = 'Breaking Mad'
__author__ = 'bugmen00t'
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa: E501
publisher = 'BreakingMad'
category = 'news'
cover_url = u'http://breakingmad.me/images/logo.png'

View File

@ -5,7 +5,6 @@ from __future__ import print_function
__license__ = 'GPL v3'
import datetime
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -23,40 +22,39 @@ class brewiarz(BasicNewsRecipe):
next_days = 1
def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
"05": "v", "06": "vi", "07": "vii", "08": "viii",
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
'05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
'09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
now = datetime.datetime.now()
feeds = []
for i in range(0, self.next_days):
for i in range(self.next_days):
url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m")
url_date_month = url_date.strftime('%m')
url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d")
url_date_year = url_date.strftime("%Y")[2:]
url_date_weekday = url_date.strftime("%A")
url_date_day = url_date.strftime('%d')
url_date_year = url_date.strftime('%Y')[2:]
url_date_weekday = url_date.strftime('%A')
url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \
url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' +
url_date_year + '/' + url_date_day + url_date_month + '/index.php3')
articles = self.parse_pages(url)
if articles:
title = url_date_weekday_pl + " " + url_date_day + \
"." + url_date_month + "." + url_date_year
title = (url_date_weekday_pl + ' ' + url_date_day +
'.' + url_date_month + '.' + url_date_year)
feeds.append((title, articles))
else:
sectors = self.get_sectors(url)
for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + \
url_date_month + "." + url_date_year + " - " + subpage.string
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \
"/" + url_date_day + url_date_month + \
"/" + subpage['href']
title = (url_date_weekday_pl + ' ' + url_date_day + '.' +
url_date_month + '.' + url_date_year + ' - ' + subpage.string)
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year +
'/' + url_date_day + url_date_month + '/' + subpage['href'])
print(url)
articles = self.parse_pages(url)
if articles:
@ -91,9 +89,8 @@ class brewiarz(BasicNewsRecipe):
sublinks = ol.findAll(name='a')
for sublink in sublinks:
link_title = self.tag_to_string(
link) + " - " + self.tag_to_string(sublink)
link_url_print = re.sub(
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link) + ' - ' + self.tag_to_string(sublink)
link_url_print = sublink['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
@ -102,8 +99,7 @@ class brewiarz(BasicNewsRecipe):
continue
else:
link_title = self.tag_to_string(link)
link_url_print = re.sub(
'php3', 'php3?kr=_druk&wr=lg&', link['href'])
link_url_print = link['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
@ -145,7 +141,7 @@ class brewiarz(BasicNewsRecipe):
if x == tag:
break
else:
print("Can't find", tag, "in", tag.parent)
print("Can't find", tag, 'in', tag.parent)
continue
for r in reversed(tag.contents):
tag.parent.insert(i, r)

View File

@ -16,7 +16,7 @@ class AdvancedUserRecipe(BasicNewsRecipe):
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa: E501
dict(id=['header', 'artTools', 'context', 'interact',
'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
dict(name=['hjtrs', 'kud'])]

View File

@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
extra_css = """
extra_css = '''
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -15,8 +15,6 @@ class BusinessStandardPrint(BasicNewsRecipe):
language = 'en_IN'
masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png'
encoding = 'utf-8'
resolve_internal_links = True
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
@ -64,7 +62,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
if dt.weekday() == 6:
self.log.warn(
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.'
" And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
)
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
raw = self.index_to_soup(url, raw=True)

View File

@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
# Insert feeds in specified order, if available
feedSort = ['Editor\'s Note', 'Editors note']
feedSort = ["Editor's Note", 'Editors note']
for i in feedSort:
if i in sections:
feeds.append((i, sections[i]))
@ -98,8 +98,7 @@ class BT(BasicNewsRecipe):
# Done with the sorted feeds
for i in feedSort:
if i in sections:
del sections[i]
sections.pop(i, None)
# Append what is left over...

View File

@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CACM(BasicNewsRecipe):
title = "ACM CACM Magazine"
description = "Published on day 1 of every month."
title = 'ACM CACM Magazine'
description = 'Published on day 1 of every month.'
language = 'en'
oldest_article = 30
max_articles_per_feed = 100
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
]
def get_cover_url(self):
"""
'''
Parse out cover URL from cover page.
Example:
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
"""
'''
soup = self.index_to_soup("https://cacm.acm.org/")
a_img = soup.find("a", class_="menuCover")
img_url = a_img.img["src"]
img_url = img_url.split("?")[0]
img_url = img_url.replace(".large", "")
soup = self.index_to_soup('https://cacm.acm.org/')
a_img = soup.find('a', class_='menuCover')
img_url = a_img.img['src']
img_url = img_url.split('?')[0]
img_url = img_url.replace('.large', '')
return img_url

View File

@ -9,8 +9,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
title = u'Calcalist'
language = 'he'
__author__ = 'marbs'
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa
simultaneous_downloads = 5
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa: E501
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
@ -23,34 +22,33 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
dict(name='div', attrs={'class': 'ArticleBodyComponent'}),
]
remove_tags = [dict(name='p', attrs={'text': ['&nbsp;']})]
max_articles_per_feed = 100
preprocess_regexps = [
(re.compile(r'<p>&nbsp;</p>', re.DOTALL | re.IGNORECASE), lambda match: '')
]
feeds = [
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"),
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"),
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"),
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"),
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"),
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"),
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"),
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"),
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"),
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"),
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"),
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"),
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"),
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"),
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"),
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"),
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"),
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"),
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"),
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"),
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"),
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"),
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"),
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml")
(u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
(u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
(u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
(u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
(u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
(u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
(u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
(u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
(u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
(u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
(u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
(u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
(u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
(u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
(u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
(u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
(u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
(u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
(u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
(u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
(u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
(u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
(u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
(u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
]

View File

@ -60,20 +60,20 @@ class CanWestPaper(BasicNewsRecipe):
]
# un-comment the following six lines for the Vancouver Province
# title = u'Vancouver Province'
# url_prefix = 'http://www.theprovince.com'
# description = u'News from Vancouver, BC'
# std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
# logo_url = 'vplogo.jpg'
# fp_tag = 'CAN_TP'
# # title = u'Vancouver Province'
# # url_prefix = 'http://www.theprovince.com'
# # description = u'News from Vancouver, BC'
# # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
# # logo_url = 'vplogo.jpg'
# # fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun
# title = u'Vancouver Sun'
# url_prefix = 'http://www.vancouversun.com'
# description = u'News from Vancouver, BC'
# std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
# logo_url = 'vslogo.jpg'
# fp_tag = 'CAN_VS'
# # title = u'Vancouver Sun'
# # url_prefix = 'http://www.vancouversun.com'
# # description = u'News from Vancouver, BC'
# # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
# # logo_url = 'vslogo.jpg'
# # fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald
title = u'Calgary Herald'
@ -90,7 +90,7 @@ class CanWestPaper(BasicNewsRecipe):
# # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
# # logo_url = 'ejlogo.jpg'
# # fp_tag = 'CAN_EJ'
#
# un-comment the following six lines for the Ottawa Citizen
# # title = u'Ottawa Citizen'
# # url_prefix = 'http://www.ottawacitizen.com'
@ -98,7 +98,7 @@ class CanWestPaper(BasicNewsRecipe):
# # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
# # logo_url = 'oclogo.jpg'
# # fp_tag = 'CAN_OC'
#
# un-comment the following six lines for the Montreal Gazette
# # title = u'Montreal Gazette'
# # url_prefix = 'http://www.montrealgazette.com'
@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):
@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe):
except:
while daysback < 7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \
str((date.today() - timedelta(days=daysback)).day) + \
'/lg/' + self.fp_tag + '.jpg'
str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg'
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover)
@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe):
continue
break
if daysback == 7:
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
def fixChars(self, string):
# Replace lsquo (\x91)
fixed = re.sub("\x91", "", string)
fixed = string.replace('\x91', '')
# Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed)
fixed = fixed.replace('\x92', '')
# Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed)
fixed = fixed.replace('\x93', '')
# Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed)
fixed = fixed.replace('\x94', '')
# Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed)
fixed = fixed.replace('\x96', '')
# Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed)
fixed = re.sub("&#x2019;", "", fixed)
fixed = fixed.replace('\x97', '')
fixed = fixed.replace('&#x2019;', '')
return fixed
def massageNCXText(self, description):
@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del(div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'):
url = self.url_prefix + url
if not url.startswith(self.url_prefix):
print("Rejected " + url)
print('Rejected ' + url)
return
if url in self.url_list:
print("Rejected dup " + url)
print('Rejected dup ' + url)
return
self.url_list.append(url)
title = self.tag_to_string(atag, False)
@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe):
return
dtag = adiv.find('div', 'content')
description = ''
print("URL " + url)
print("TITLE " + title)
print('URL ' + url)
print('TITLE ' + title)
if dtag is not None:
stag = dtag.span
if stag is not None:
@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False)
else:
description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description)
print('DESCRIPTION: ' + description)
if key not in articles:
articles[key] = []
articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl)
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try:
soup = self.index_to_soup(self.url_prefix + keyurl)
except:
print("Section: " + key + ' NOT FOUND')
print('Section: ' + key + ' NOT FOUND')
return
ans.append(key)
mainsoup = soup.find('div', 'bodywrapper')
@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe):
for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}):
handle_article(wdiv, key)
for (k, url) in self.postmedia_index_pages:
for k,url in self.postmedia_index_pages:
parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if key in articles]
return ans

View File

@ -4,7 +4,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1271446252(BasicNewsRecipe):
title = u'CanardPC'
oldest_article = 7
max_articles_per_feed = 100
language = 'fr'
__author__ = 'zorgluf'
max_articles_per_feed = 25

View File

@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe):
keep_only_tags = [
dict(name='h1'),
dict(name='p'),
dict(name='span', attrs={'id': ["textbody"]})
dict(name='span', attrs={'id': ['textbody']})
]
# 3 posts seemed to have utf8 encoding
@ -36,6 +36,6 @@ class Capital(BasicNewsRecipe):
'http://www.capital.gr/articles/articlesrss.asp?catid=4'),
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F',
'http://www.capital.gr/articles/articlesrss.asp?catid=6'),
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa: E501
'http://www.capital.gr/articles/articlesrss.asp?catid=8'),
]

View File

@ -15,6 +15,7 @@ def absurl(x):
x = 'https://caravanmagazine.in' + x
return x
def safe_dict(data, *names):
ans = data
for x in names:
@ -49,6 +50,7 @@ def parse_body(x):
yield from parse_body(p)
yield '</p>'
def parse_p(p):
if p.get('type', '') == 'text':
if 'marks' in p:
@ -96,7 +98,7 @@ class CaravanMagazine(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *args, **kw)
if not self.username or not self.password:
return br
data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}})
data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}})
if not isinstance(data, bytes):
data = data.encode('utf-8')
rq = Request(
@ -138,7 +140,7 @@ class CaravanMagazine(BasicNewsRecipe):
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
x = d.split('-')
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}})
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
raw = json.loads(self.index_to_soup(api, raw=True))
@ -174,7 +176,7 @@ class CaravanMagazine(BasicNewsRecipe):
def print_version(self, url):
slug = urlparse(url).path
inp = json.dumps({"0":{"json":{"slug":slug}}})
inp = json.dumps({'0':{'json':{'slug':slug}}})
return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
def preprocess_raw_html(self, raw, url):
@ -211,6 +213,6 @@ class CaravanMagazine(BasicNewsRecipe):
for x in art_cont['premiumContent']:
premium_cont += '\n' + ''.join(parse_body(x))
return '<html><body><div>' \
+ cat + title + desc + auth + lede + free_cont + premium_cont + \
'</div></body></html>'
return ('<html><body><div>'
+ cat + title + desc + auth + lede + free_cont + premium_cont +
'</div></body></html>')

Some files were not shown because too many files have changed in this diff Show More