mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'ruff-pep8-strict' of https://github.com/un-pogaz/calibre
This commit is contained in:
commit
7e61ea2248
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -27,7 +26,7 @@ for name, src in sources.items():
|
||||
os.chdir(iconset)
|
||||
try:
|
||||
for sz in (16, 32, 128, 256, 512, 1024):
|
||||
iname = 'icon_{0}x{0}.png'.format(sz)
|
||||
iname = f'icon_{sz}x{sz}.png'
|
||||
iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2)
|
||||
if src.endswith('.svg'):
|
||||
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
||||
@ -36,7 +35,7 @@ for name, src in sources.items():
|
||||
if sz == 512:
|
||||
shutil.copy2(src, iname)
|
||||
else:
|
||||
subprocess.check_call(['convert', src, '-resize', '{0}x{0}'.format(sz), iname])
|
||||
subprocess.check_call(['convert', src, '-resize', f'{sz}x{sz}', iname])
|
||||
if sz > 16:
|
||||
shutil.copy2(iname, iname2x)
|
||||
if sz > 512:
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -24,7 +23,7 @@ for name, src in sources.items():
|
||||
try:
|
||||
names = []
|
||||
for sz in (16, 24, 32, 48, 64, 256):
|
||||
iname = os.path.join('ico_temp', '{0}x{0}.png'.format(sz))
|
||||
iname = os.path.join('ico_temp', f'{sz}x{sz}.png')
|
||||
subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname])
|
||||
subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname])
|
||||
if sz >= 128:
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import argparse
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
@ -27,7 +26,7 @@ def clone_node(node, parent):
|
||||
def merge():
|
||||
base = os.path.dirname(os.path.abspath(__file__))
|
||||
ans = etree.fromstring(
|
||||
'<svg xmlns="%s" xmlns:xlink="%s"/>' % (SVG_NS, XLINK_NS),
|
||||
f'<svg xmlns="{SVG_NS}" xmlns:xlink="{XLINK_NS}"/>',
|
||||
parser=etree.XMLParser(
|
||||
recover=True, no_network=True, resolve_entities=False
|
||||
)
|
||||
@ -43,14 +42,14 @@ def merge():
|
||||
recover=True, no_network=True, resolve_entities=False
|
||||
)
|
||||
)
|
||||
symbol = ans.makeelement('{%s}symbol' % SVG_NS)
|
||||
symbol = ans.makeelement('{%s}symbol' % SVG_NS) # noqa: UP031
|
||||
symbol.set('viewBox', svg.get('viewBox'))
|
||||
symbol.set('id', 'icon-' + f.rpartition('.')[0])
|
||||
for child in svg.iterchildren('*'):
|
||||
clone_node(child, symbol)
|
||||
ans.append(symbol)
|
||||
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
|
||||
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
||||
ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
||||
return ans
|
||||
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
@ -1,5 +1,3 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# calibre documentation build configuration file, created by
|
||||
# sphinx-quickstart.py on Sun Mar 23 01:23:55 2008.
|
||||
#
|
||||
@ -47,11 +45,11 @@ templates_path = ['templates']
|
||||
source_suffix = {'.rst': 'restructuredtext'}
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa
|
||||
master_doc = 'index' if tags.has('online') else 'simple_index' # noqa: F821
|
||||
# kill the warning about index/simple_index not being in a toctree
|
||||
exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst']
|
||||
exclude_patterns.append('cli-options-header.rst')
|
||||
if tags.has('gettext'): # noqa
|
||||
if tags.has('gettext'): # noqa: F821
|
||||
# Do not exclude anything as the strings must be translated. This will
|
||||
# generate a warning about the documents not being in a toctree, just ignore
|
||||
# it.
|
||||
@ -64,7 +62,7 @@ language = os.environ.get('CALIBRE_OVERRIDE_LANG', 'en')
|
||||
def generated_langs():
|
||||
try:
|
||||
return os.listdir(os.path.join(base, 'generated'))
|
||||
except EnvironmentError as e:
|
||||
except OSError as e:
|
||||
if e.errno != errno.ENOENT:
|
||||
raise
|
||||
return ()
|
||||
@ -99,13 +97,13 @@ today_fmt = '%B %d, %Y'
|
||||
unused_docs = ['global', 'cli/global']
|
||||
|
||||
locale_dirs = ['locale/']
|
||||
title = '%s User Manual' % __appname__
|
||||
title = f'{__appname__} User Manual'
|
||||
needs_localization = language not in {'en', 'eng'}
|
||||
if needs_localization:
|
||||
import gettext
|
||||
try:
|
||||
t = gettext.translation('simple_index', locale_dirs[0], [language])
|
||||
except IOError:
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
title = t.gettext(title)
|
||||
@ -176,7 +174,7 @@ def sort_languages(x):
|
||||
lc, name = x
|
||||
if lc == language:
|
||||
return ''
|
||||
return sort_key(type(u'')(name))
|
||||
return sort_key(str(name))
|
||||
|
||||
|
||||
website = 'https://calibre-ebook.com'
|
||||
@ -193,13 +191,13 @@ extlinks = {
|
||||
}
|
||||
del sort_languages, get_language
|
||||
|
||||
epub_author = u'Kovid Goyal'
|
||||
epub_publisher = u'Kovid Goyal'
|
||||
epub_copyright = u'© {} Kovid Goyal'.format(date.today().year)
|
||||
epub_description = u'Comprehensive documentation for calibre'
|
||||
epub_identifier = u'https://manual.calibre-ebook.com'
|
||||
epub_scheme = u'url'
|
||||
epub_uid = u'S54a88f8e9d42455e9c6db000e989225f'
|
||||
epub_author = 'Kovid Goyal'
|
||||
epub_publisher = 'Kovid Goyal'
|
||||
epub_copyright = f'© {date.today().year} Kovid Goyal'
|
||||
epub_description = 'Comprehensive documentation for calibre'
|
||||
epub_identifier = 'https://manual.calibre-ebook.com'
|
||||
epub_scheme = 'url'
|
||||
epub_uid = 'S54a88f8e9d42455e9c6db000e989225f'
|
||||
epub_tocdepth = 4
|
||||
epub_tocdup = True
|
||||
epub_cover = ('epub_cover.jpg', 'epub_cover_template.html')
|
||||
@ -255,5 +253,5 @@ latex_show_pagerefs = True
|
||||
latex_show_urls = 'footnote'
|
||||
latex_elements = {
|
||||
'papersize':'letterpaper',
|
||||
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'),
|
||||
'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), # noqa: UP031
|
||||
}
|
||||
|
@ -195,13 +195,13 @@ details and examples.
|
||||
lines = []
|
||||
for cmd in COMMANDS:
|
||||
parser = option_parser_for(cmd)()
|
||||
lines += ['.. _calibredb-%s-%s:' % (language, cmd), '']
|
||||
lines += [f'.. _calibredb-{language}-{cmd}:', '']
|
||||
lines += [cmd, '~'*20, '']
|
||||
usage = parser.usage.strip()
|
||||
usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()]
|
||||
usage = usage.replace('%prog', 'calibredb').splitlines()
|
||||
cmdline = ' '+usage[0]
|
||||
usage = usage[1:]
|
||||
usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage]
|
||||
usage = [re.sub(rf'({cmd})([^a-zA-Z0-9])', r':command:`\1`\2', i) for i in usage]
|
||||
lines += ['.. code-block:: none', '', cmdline, '']
|
||||
lines += usage
|
||||
groups = [(None, None, parser.option_list)]
|
||||
@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
|
||||
parser, plumber = create_option_parser(['ebook-convert',
|
||||
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
|
||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||
parser.option_groups if g.title == "INPUT OPTIONS"]
|
||||
parser.option_groups if g.title == 'INPUT OPTIONS']
|
||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
|
||||
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
|
||||
'dummyi.'+pl.file_type, '-h'], default_log)
|
||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||
parser.option_groups if g.title == "OUTPUT OPTIONS"]
|
||||
parser.option_groups if g.title == 'OUTPUT OPTIONS']
|
||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||
|
||||
@ -257,7 +257,7 @@ def generate_ebook_convert_help(preamble, app):
|
||||
def update_cli_doc(name, raw, language):
|
||||
if isinstance(raw, bytes):
|
||||
raw = raw.decode('utf-8')
|
||||
path = 'generated/%s/%s.rst' % (language, name)
|
||||
path = f'generated/{language}/{name}.rst'
|
||||
old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else ''
|
||||
if not os.path.exists(path) or old_raw != raw:
|
||||
import difflib
|
||||
@ -352,7 +352,7 @@ def cli_docs(language):
|
||||
usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()]
|
||||
cmdline = usage[0]
|
||||
usage = usage[1:]
|
||||
usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage]
|
||||
usage = [i.replace(cmd, f':command:`{cmd}`') for i in usage]
|
||||
usage = '\n'.join(usage)
|
||||
preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage)
|
||||
if cmd == 'ebook-convert':
|
||||
@ -382,7 +382,7 @@ def template_docs(language):
|
||||
|
||||
def localized_path(app, langcode, pagename):
|
||||
href = app.builder.get_target_uri(pagename)
|
||||
href = re.sub(r'generated/[a-z]+/', 'generated/%s/' % langcode, href)
|
||||
href = re.sub(r'generated/[a-z]+/', f'generated/{langcode}/', href)
|
||||
prefix = '/'
|
||||
if langcode != 'en':
|
||||
prefix += langcode + '/'
|
||||
@ -397,7 +397,7 @@ def add_html_context(app, pagename, templatename, context, *args):
|
||||
|
||||
def guilabel_role(typ, rawtext, text, *args, **kwargs):
|
||||
from sphinx.roles import GUILabel
|
||||
text = text.replace(u'->', u'\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
|
||||
text = text.replace('->', '\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}')
|
||||
return GUILabel()(typ, rawtext, text, *args, **kwargs)
|
||||
|
||||
|
||||
@ -405,7 +405,7 @@ def setup_man_pages(app):
|
||||
documented_cmds = get_cli_docs()[0]
|
||||
man_pages = []
|
||||
for cmd, option_parser in documented_cmds:
|
||||
path = 'generated/%s/%s' % (app.config.language, cmd)
|
||||
path = f'generated/{app.config.language}/{cmd}'
|
||||
man_pages.append((
|
||||
path, cmd, cmd, 'Kovid Goyal', 1
|
||||
))
|
||||
|
@ -49,8 +49,8 @@ class EPUBHelpBuilder(EpubBuilder):
|
||||
imgname = container.href_to_name(img.get('src'), name)
|
||||
fmt, width, height = identify(container.raw_data(imgname))
|
||||
if width == -1:
|
||||
raise ValueError('Failed to read size of: %s' % imgname)
|
||||
img.set('style', 'width: %dpx; height: %dpx' % (width, height))
|
||||
raise ValueError(f'Failed to read size of: {imgname}')
|
||||
img.set('style', f'width: {width}px; height: {height}px')
|
||||
|
||||
def fix_opf(self, container):
|
||||
spine_names = {n for n, l in container.spine_names}
|
||||
@ -75,7 +75,7 @@ class EPUBHelpBuilder(EpubBuilder):
|
||||
|
||||
# Ensure that the cover-image property is set
|
||||
cover_id = rmap['_static/' + self.config.epub_cover[0]]
|
||||
for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)):
|
||||
for item in container.opf_xpath(f'//opf:item[@id="{cover_id}"]'):
|
||||
item.set('properties', 'cover-image')
|
||||
for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'):
|
||||
item.set('properties', 'svg calibre:title-page')
|
||||
|
@ -32,7 +32,7 @@ class DemoTool(Tool):
|
||||
def create_action(self, for_toolbar=True):
|
||||
# Create an action, this will be added to the plugins toolbar and
|
||||
# the plugins menu
|
||||
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa
|
||||
ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa: F821
|
||||
if not for_toolbar:
|
||||
# Register a keyboard shortcut for this toolbar action. We only
|
||||
# register it for the action created for the menu, not the toolbar,
|
||||
|
@ -76,5 +76,3 @@ class InterfacePluginDemo(InterfaceActionBase):
|
||||
ac = self.actual_plugin_
|
||||
if ac is not None:
|
||||
ac.apply_settings()
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@ class DemoDialog(QDialog):
|
||||
self.l.addWidget(self.view_button)
|
||||
|
||||
self.update_metadata_button = QPushButton(
|
||||
'Update metadata in a book\'s files', self)
|
||||
"Update metadata in a book's files", self)
|
||||
self.update_metadata_button.clicked.connect(self.update_metadata)
|
||||
self.l.addWidget(self.update_metadata_button)
|
||||
|
||||
|
@ -54,8 +54,8 @@ class checkbox(nodes.Element):
|
||||
def visit_checkbox(self, node):
|
||||
cid = node['ids'][0]
|
||||
node['classes'] = []
|
||||
self.body.append('<input id="{0}" type="checkbox" />'
|
||||
'<label for="{0}"> </label>'.format(cid))
|
||||
self.body.append(f'<input id="{cid}" type="checkbox" />'
|
||||
f'<label for="{cid}"> </label>')
|
||||
|
||||
|
||||
def modify_li(li):
|
||||
@ -66,7 +66,7 @@ def modify_li(li):
|
||||
li['classes'].append('leaf-node')
|
||||
else:
|
||||
c = checkbox()
|
||||
c['ids'] = ['collapse-checkbox-{}'.format(next(id_counter))]
|
||||
c['ids'] = [f'collapse-checkbox-{next(id_counter)}']
|
||||
li.insert(0, c)
|
||||
|
||||
|
||||
|
@ -89,5 +89,6 @@ def generate_template_language_help(language, log):
|
||||
a(POSTAMBLE)
|
||||
return ''.join(output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
generate_template_language_help()
|
||||
|
@ -21,10 +21,11 @@ quote-style = 'single'
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = ['E402', 'E722', 'E741']
|
||||
select = ['E', 'F', 'I', 'W', 'INT']
|
||||
select = ['E', 'F', 'I', 'W', 'INT', 'PIE794']
|
||||
unfixable = ['PIE794']
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501', 'W191']
|
||||
"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501']
|
||||
"src/qt/*.py" = ['I']
|
||||
"src/qt/*.pyi" = ['I']
|
||||
|
||||
|
@ -17,6 +17,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
use_archive = True
|
||||
|
||||
|
||||
def E(parent, name, text='', **attrs):
|
||||
ans = parent.makeelement(name, **attrs)
|
||||
ans.text = text
|
||||
@ -61,7 +62,7 @@ if use_archive:
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -157,7 +158,7 @@ class Economist(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = (
|
||||
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
|
||||
'an unworthy, timid ignorance obstructing our progress.”'
|
||||
@ -170,7 +171,7 @@ class Economist(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
'dblClkTrk', 'ec-article-info', 'share_inline_header',
|
||||
@ -224,13 +225,13 @@ class Economist(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
# return self.economist_test_article()
|
||||
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is None:
|
||||
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
|
||||
data = json.loads(script_tag.string)
|
||||
content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1]
|
||||
query = {
|
||||
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa
|
||||
'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501
|
||||
'operationName': 'HubsDataQuery',
|
||||
'variables': '{{"id":"/content/{}","size":40}}'.format(content_id),
|
||||
}
|
||||
@ -247,22 +248,22 @@ class Economist(BasicNewsRecipe):
|
||||
self.description = data['description']
|
||||
|
||||
feeds_dict = defaultdict(list)
|
||||
for part in safe_dict(data, "hasPart", "parts"):
|
||||
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||
section = part['title']
|
||||
self.log(section)
|
||||
for art in safe_dict(part, "hasPart", "parts"):
|
||||
title = safe_dict(art, "title")
|
||||
desc = safe_dict(art, "rubric") or ''
|
||||
sub = safe_dict(art, "flyTitle") or ''
|
||||
for art in safe_dict(part, 'hasPart', 'parts'):
|
||||
title = safe_dict(art, 'title')
|
||||
desc = safe_dict(art, 'rubric') or ''
|
||||
sub = safe_dict(art, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
pt.write(json.dumps(art).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, '\n\t\t', desc)
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
return list(feeds_dict.items())
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.url = soup.find('h1')['title']
|
||||
@ -311,26 +312,26 @@ class Economist(BasicNewsRecipe):
|
||||
return ans
|
||||
|
||||
def economist_parse_index(self, soup):
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is not None:
|
||||
data = json.loads(script_tag.string)
|
||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
|
||||
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
|
||||
|
||||
feeds = []
|
||||
|
||||
for coll in safe_dict(data, "props", "pageProps", "content", "collections"):
|
||||
section = safe_dict(coll, "headline") or ''
|
||||
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
|
||||
section = safe_dict(coll, 'headline') or ''
|
||||
self.log(section)
|
||||
articles = []
|
||||
for part in safe_dict(coll, "hasPart", "parts"):
|
||||
title = safe_dict(part, "headline") or ''
|
||||
url = safe_dict(part, "url", "canonical") or ''
|
||||
for part in safe_dict(coll, 'hasPart', 'parts'):
|
||||
title = safe_dict(part, 'headline') or ''
|
||||
url = safe_dict(part, 'url', 'canonical') or ''
|
||||
if not title or not url:
|
||||
continue
|
||||
desc = safe_dict(part, "description") or ''
|
||||
sub = safe_dict(part, "subheadline") or ''
|
||||
desc = safe_dict(part, 'description') or ''
|
||||
sub = safe_dict(part, 'subheadline') or ''
|
||||
if sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
@ -341,7 +342,6 @@ class Economist(BasicNewsRecipe):
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
# open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
|
||||
if use_archive:
|
||||
@ -358,9 +358,9 @@ class Economist(BasicNewsRecipe):
|
||||
cleanup_html_article(root)
|
||||
|
||||
if '/interactive/' in url:
|
||||
return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \
|
||||
+ 'This article is supposed to be read in a browser' \
|
||||
+ '</em></article></body></html>'
|
||||
return ('<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>'
|
||||
'This article is supposed to be read in a browser.'
|
||||
'</em></article></body></html>')
|
||||
|
||||
for div in root.xpath('//div[@class="lazy-image"]'):
|
||||
noscript = list(div.iter('noscript'))
|
||||
|
@ -36,22 +36,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
|
||||
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa: E501
|
||||
]
|
||||
|
||||
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
|
||||
remove_tags_after = dict(
|
||||
name='div', attrs={'class': ['related-news', 'col']})
|
||||
remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
|
||||
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
'''
|
||||
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||
|
@ -9,8 +9,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class Minutes(BasicNewsRecipe):
|
||||
|
@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
("They Draw and Cook", "http://www.theydrawandcook.com/")
|
||||
('They Draw and Cook', 'http://www.theydrawandcook.com/')
|
||||
]:
|
||||
articles = self.make_links(url)
|
||||
if articles:
|
||||
|
@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheMITPressReader(BasicNewsRecipe):
|
||||
title = "The MIT Press Reader"
|
||||
title = 'The MIT Press Reader'
|
||||
__author__ = 'yodha8'
|
||||
language = 'en'
|
||||
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors."
|
||||
" This recipe pulls articles from the past 7 days.")
|
||||
description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
|
||||
' This recipe pulls articles from the past 7 days.')
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
|
||||
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
|
||||
|
@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
|
||||
lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.chapo{font-style:italic; margin: 1em 0 0.5em}
|
||||
"""
|
||||
'''
|
||||
|
@ -85,9 +85,10 @@ class ADRecipe(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
parts = url.split('/')
|
||||
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
||||
+ parts[10] + '/' + parts[7] + '/print/' + \
|
||||
parts[8] + '/' + parts[9] + '/' + parts[13]
|
||||
print_url = 'http://' + '/'.join([
|
||||
parts[2], parts[3], parts[4], parts[5], parts[10],
|
||||
parts[7], 'print', parts[8], parts[9], parts[13],
|
||||
])
|
||||
|
||||
return print_url
|
||||
|
||||
|
@ -33,7 +33,7 @@ class Adevarul(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
|
||||
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa: E501
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
|
@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
.score_header{font-size: large; color: #50544A}
|
||||
img{margin-bottom: 1em;}
|
||||
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -8,13 +8,14 @@ def absurl(url):
|
||||
if url.startswith('/'):
|
||||
return 'https://www.afr.com' + url
|
||||
|
||||
|
||||
class afr(BasicNewsRecipe):
|
||||
title = 'Australian Financial Review'
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'For more than 65 years The Australian Financial Review has been the authority on business,'
|
||||
' finance and investment news in Australia. It has a reputation for independent, award-winning '
|
||||
'journalism and is essential reading for Australia\'s business and investor community.'
|
||||
"journalism and is essential reading for Australia's business and investor community."
|
||||
)
|
||||
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
|
||||
encoding = 'utf-8'
|
||||
@ -24,7 +25,6 @@ class afr(BasicNewsRecipe):
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
|
||||
keep_only_tags = [
|
||||
|
@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
"""
|
||||
'''
|
||||
Crée une couverture personnalisée avec le logo
|
||||
"""
|
||||
'''
|
||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||
|
||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
||||
|
||||
weekday = french_weekday[wkd]
|
||||
month = french_month[today.month]
|
||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
||||
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||
edition = today.strftime('Édition de %Hh')
|
||||
|
||||
# Image de base
|
||||
|
@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Agents(BasicNewsRecipe):
|
||||
title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB'
|
||||
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa
|
||||
description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa: E501
|
||||
__author__ = 'bugmen00t'
|
||||
publisher = 'Project Media'
|
||||
publication_type = 'news'
|
||||
|
@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,sans-serif}
|
||||
"""
|
||||
'''
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category,
|
||||
'publisher': publisher, 'language': language
|
||||
|
@ -22,7 +22,7 @@ class AlMasryAlyoum(BasicNewsRecipe):
|
||||
category = 'News'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa: E501
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['article']})
|
||||
|
@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
title = title[0:120] + '...'
|
||||
href = link.get('href')
|
||||
if not href:
|
||||
self._p("BAD HREF: " + str(link))
|
||||
self._p('BAD HREF: ' + str(link))
|
||||
return
|
||||
self.queue_article_link(section, href, title)
|
||||
|
||||
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
|
||||
age = (datetime.datetime.now() - date).days
|
||||
if (age > self.oldest_article):
|
||||
return "too old"
|
||||
return 'too old'
|
||||
return False
|
||||
|
||||
def scrape_article_date(self, soup):
|
||||
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
def date_from_string(self, datestring):
|
||||
try:
|
||||
# eg: Posted September 17, 2014
|
||||
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
|
||||
dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
|
||||
except:
|
||||
dt = None
|
||||
|
||||
@ -203,11 +203,10 @@ class AlMonitor(BasicNewsRecipe):
|
||||
return self.tag_to_string(n).strip()
|
||||
|
||||
def _dbg_soup_node(self, node):
|
||||
s = ' cls: ' + str(node.get('class')).strip() + \
|
||||
' id: ' + str(node.get('id')).strip() + \
|
||||
' role: ' + str(node.get('role')).strip() + \
|
||||
' txt: ' + self.text(node)
|
||||
return s
|
||||
return (' cls: ' + str(node.get('class')).strip() +
|
||||
' id: ' + str(node.get('id')).strip() +
|
||||
' role: ' + str(node.get('role')).strip() +
|
||||
' txt: ' + self.text(node))
|
||||
|
||||
def _p(self, msg):
|
||||
curframe = inspect.currentframe()
|
||||
|
@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
title = u'Albert Mohler\'s Blog'
|
||||
title = u"Albert Mohler's Blog"
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
oldest_article = 90
|
||||
@ -13,8 +13,7 @@ class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
|
||||
publisher = 'Albert Mohler'
|
||||
language = 'en'
|
||||
author = 'Albert Mohler'
|
||||
|
||||
feeds = [(u'Albert Mohler\'s Blog',
|
||||
feeds = [(u"Albert Mohler's Blog",
|
||||
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||
|
@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
|
||||
# Extract a list of dates from the page.
|
||||
# Subset this out to the list of target dates for extraction.
|
||||
date_list = []
|
||||
for div in soup.findAll('div', attrs={'id': "dayheader"}):
|
||||
for div in soup.findAll('div', attrs={'id': 'dayheader'}):
|
||||
date_list.append(self.tag_to_string(div))
|
||||
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
|
||||
date_list_bool = [
|
||||
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
|
||||
|
||||
# Process each paragraph one by one.
|
||||
# Stop when the text of the previous div is not in the target date list.
|
||||
for div in soup.findAll('div', attrs={'class': "mobile-front"}):
|
||||
for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
|
||||
for p in div.findAll('p'):
|
||||
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
|
||||
if p.find('a'):
|
||||
title = self.tag_to_string(p)
|
||||
link = p.find('a')['href']
|
||||
if self.tag_to_string(p.findPreviousSibling('h3')
|
||||
) == "Articles of Note":
|
||||
) == 'Articles of Note':
|
||||
articles_note.append({
|
||||
'title': title,
|
||||
'url': link,
|
||||
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
|
||||
'date': ''
|
||||
})
|
||||
elif self.tag_to_string(p.findPreviousSibling('h3')
|
||||
) == "New Books":
|
||||
) == 'New Books':
|
||||
new_books.append({
|
||||
'title': title,
|
||||
'url': link,
|
||||
|
@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
|
||||
def skip_ad_pages(self, soup):
|
||||
tag = soup.find(attrs={'class': 'rodzaj'})
|
||||
if tag and tag.a.string.lower().strip() == 'recenzje':
|
||||
link = soup.find(text=re.compile('recenzuje'))
|
||||
link = soup.find(text=re.compile(r'recenzuje'))
|
||||
if link:
|
||||
return self.index_to_soup(link.parent['href'], raw=True)
|
||||
|
@ -21,7 +21,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
||||
remove_images = False
|
||||
|
||||
def get_cover_url(self):
|
||||
"""Récupère dynamiquement l'URL de la dernière une depuis MLP"""
|
||||
'''Récupère dynamiquement l'URL de la dernière une depuis MLP'''
|
||||
br = self.get_browser()
|
||||
try:
|
||||
# Accéder à la page du magazine sur MLP
|
||||
@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
||||
self.log('Cover URL found:', cover_url)
|
||||
return cover_url
|
||||
|
||||
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut')
|
||||
self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
|
||||
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
|
||||
|
||||
except Exception as e:
|
||||
@ -92,7 +92,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
||||
display_name = section_name.replace('-', ' ').title()
|
||||
articles.append((display_name, feed_articles[:self.max_articles_per_feed]))
|
||||
except Exception as e:
|
||||
self.log.error(f'Error processing {section_name}: {str(e)}')
|
||||
self.log.error(f'Error processing {section_name}: {e}')
|
||||
continue
|
||||
|
||||
return articles
|
||||
@ -133,7 +133,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
||||
'description': ''
|
||||
})
|
||||
except Exception as e:
|
||||
self.log.error(f'Error getting H1 title for {article_url}: {str(e)}')
|
||||
self.log.error(f'Error getting H1 title for {article_url}: {e}')
|
||||
continue
|
||||
|
||||
return feed_articles
|
||||
|
@ -21,11 +21,9 @@ class WwwAltomdata_dk(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'),
|
||||
('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'),
|
||||
|
||||
]
|
||||
|
||||
|
@ -34,7 +34,7 @@ class AM730(BasicNewsRecipe):
|
||||
description = 'http://www.am730.com.hk'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa: E501
|
||||
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
|
||||
dict(name='div',attrs={'class':'logo-container print-logo'}),
|
||||
dict(name='div',attrs={'id':'galleria'})]
|
||||
@ -53,12 +53,12 @@ class AM730(BasicNewsRecipe):
|
||||
return self.masthead_url
|
||||
|
||||
def getAMSectionArticles(self, sectionName,url):
|
||||
# print sectionName
|
||||
# print(sectionName)
|
||||
soup = self.index_to_soup(url)
|
||||
articles = []
|
||||
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
||||
href = aTag.get('href',False)
|
||||
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
|
||||
if not href.encode('utf-8').startswith(url.encode('utf-8')):
|
||||
continue # not in same section
|
||||
|
||||
title = href.split('/')[-1].split('-')[0]
|
||||
@ -67,7 +67,7 @@ class AM730(BasicNewsRecipe):
|
||||
print(title)
|
||||
try:
|
||||
if articles.index({'title':title,'url':href})>=0:
|
||||
# print 'already added'
|
||||
# print('already added')
|
||||
continue # already added
|
||||
except:
|
||||
pass
|
||||
@ -78,7 +78,7 @@ class AM730(BasicNewsRecipe):
|
||||
break
|
||||
if self.debug:
|
||||
print(articles)
|
||||
return (sectionName,articles)
|
||||
return sectionName, articles
|
||||
|
||||
def parse_index(self):
|
||||
# hard code sections
|
||||
@ -91,7 +91,7 @@ class AM730(BasicNewsRecipe):
|
||||
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
|
||||
] # articles =[]
|
||||
SectionsArticles=[]
|
||||
for (title, url) in Sections:
|
||||
for title, url in Sections:
|
||||
if self.debug:
|
||||
print(title)
|
||||
print(url)
|
||||
|
@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
|
||||
language = 'es_AR'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Roboto, sans-serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description,
|
||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AmericanThinker(BasicNewsRecipe):
|
||||
title = u'American Thinker'
|
||||
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
||||
description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
|
||||
__author__ = 'Walt Anthony'
|
||||
publisher = 'Thomas Lifson'
|
||||
category = 'news, politics, USA'
|
||||
@ -33,7 +33,7 @@ class AmericanThinker(BasicNewsRecipe):
|
||||
root = html5lib.parse(
|
||||
clean_xml_chars(raw), treebuilder='lxml',
|
||||
namespaceHTMLElements=False)
|
||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
|
||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa: E501
|
||||
x.getparent().remove(x)
|
||||
return etree.tostring(root, encoding='unicode')
|
||||
|
||||
|
@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
||||
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
||||
return url.replace('/show/', '/print/') # 2014-02-27 AGE: update
|
||||
|
@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. '
|
||||
"Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
|
||||
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
|
||||
'more than 5000 years of Egyptian history. Published bimonthly.'
|
||||
)
|
||||
|
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
|
||||
|
||||
index = 'https://epaper.andhrajyothy.com'
|
||||
|
||||
|
||||
class andhra(BasicNewsRecipe):
|
||||
title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్'
|
||||
language = 'te'
|
||||
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
|
||||
url = str(snaps['OrgId'])
|
||||
if snaps['ObjectType'] == 4:
|
||||
continue
|
||||
feeds_dict[section].append({"title": '', "url": url})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
feeds_dict[section].append({'title': '', 'url': url})
|
||||
return list(feeds_dict.items())
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
data = json.loads(raw)
|
||||
|
@ -18,6 +18,7 @@ today = today.replace('/', '%2F')
|
||||
|
||||
index = 'https://epaper.andhrajyothy.com'
|
||||
|
||||
|
||||
class andhra(BasicNewsRecipe):
|
||||
title = 'ఆంధ్రజ్యోతి - తెలంగాణ'
|
||||
language = 'te'
|
||||
@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe):
|
||||
url = str(snaps['OrgId'])
|
||||
if snaps['ObjectType'] == 4:
|
||||
continue
|
||||
feeds_dict[section].append({"title": '', "url": url})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
feeds_dict[section].append({'title': '', 'url': url})
|
||||
return list(feeds_dict.items())
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
data = json.loads(raw)
|
||||
|
@ -16,5 +16,5 @@ class Android_com_pl(BasicNewsRecipe):
|
||||
remove_tags_after = [{'class': 'post-content'}]
|
||||
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||
(re.compile(r'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||
|
@ -32,13 +32,10 @@ class AnimalPolitico(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.animalpolitico.com/')
|
||||
articles = []
|
||||
for a in soup(**{
|
||||
'name': 'a',
|
||||
'attrs': {
|
||||
for a in soup(name='a', attrs={
|
||||
'href': True, 'title': True,
|
||||
'data-author': True, 'data-type': True,
|
||||
'data-home-title': True
|
||||
}
|
||||
}):
|
||||
title = a['title']
|
||||
url = a['href']
|
||||
|
@ -19,8 +19,6 @@ class AmericanProspect(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(id=['title', 'content']),
|
||||
]
|
||||
|
@ -18,8 +18,6 @@ class Arbetaren_SE(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
language = 'sv'
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@class="thumbnail"]'
|
||||
auto_cleanup_keep = '//div[@id="article-image"]'
|
||||
auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
|
||||
auto_cleanup_keep = '//div[@class="thumbnail"]|//div[@id="article-image"]|//span[@class="important"]'
|
||||
|
||||
feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')]
|
||||
|
@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
|
||||
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
|
||||
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
|
||||
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
|
||||
(u"BC", u"https://www.arcamax.com/thefunnies/bc"),
|
||||
(u'BC', u'https://www.arcamax.com/thefunnies/bc'),
|
||||
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
|
||||
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
|
||||
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"),
|
||||
(u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
|
||||
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
|
||||
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
|
||||
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
|
||||
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
|
||||
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"),
|
||||
(u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
|
||||
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
|
||||
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
|
||||
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"),
|
||||
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"),
|
||||
(u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
|
||||
(u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
|
||||
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
|
||||
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
|
||||
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
|
||||
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
|
||||
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
|
||||
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
|
||||
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
|
||||
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"),
|
||||
(u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
|
||||
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
|
||||
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
|
||||
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
|
||||
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
|
||||
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
|
||||
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
|
||||
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"),
|
||||
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"),
|
||||
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"),
|
||||
(u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
|
||||
(u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
|
||||
(u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
|
||||
]:
|
||||
self.log('Finding strips for:', title)
|
||||
articles = self.make_links(url, title)
|
||||
|
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ArretSurImages(BasicNewsRecipe):
|
||||
title = 'Arrêt sur Images'
|
||||
description = 'Site français d\'analyse des médias'
|
||||
description = "Site français d'analyse des médias"
|
||||
language = 'fr'
|
||||
encoding = 'utf-8'
|
||||
needs_subscription = True
|
||||
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
"""
|
||||
'''
|
||||
Crée une couverture personnalisée avec le logo ASI
|
||||
"""
|
||||
'''
|
||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||
|
||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
|
||||
weekday = french_weekday[wkd]
|
||||
month = french_month[today.month]
|
||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
||||
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||
edition = today.strftime('Édition de %Hh')
|
||||
|
||||
img = QImage(1400, 1920, QImage.Format_RGB888)
|
||||
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
|
||||
print('Authentification réussie')
|
||||
else:
|
||||
print('Échec de l\'authentification - Vérifiez vos identifiants')
|
||||
print("Échec de l'authentification - Vérifiez vos identifiants")
|
||||
except Exception as e:
|
||||
print(f'Erreur lors de l\'authentification: {str(e)}')
|
||||
print(f"Erreur lors de l'authentification: {e}")
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
@ -162,7 +162,7 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
</html>
|
||||
'''
|
||||
except Exception as e:
|
||||
print(f'Erreur preprocessing HTML: {str(e)}')
|
||||
print(f'Erreur preprocessing HTML: {e}')
|
||||
return raw_html
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -186,11 +186,11 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
else:
|
||||
tag.replace_with(img_tag)
|
||||
except Exception as e:
|
||||
print(f'Erreur processing image: {str(e)}')
|
||||
print(f'Erreur processing image: {e}')
|
||||
tag.decompose()
|
||||
else:
|
||||
tag.decompose()
|
||||
return soup
|
||||
except Exception as e:
|
||||
print(f'Erreur preprocessing HTML: {str(e)}')
|
||||
print(f'Erreur preprocessing HTML: {e}')
|
||||
return soup
|
||||
|
@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
|
||||
|
||||
"""
|
||||
'''
|
||||
https://www.asahi.com/ajw/
|
||||
"""
|
||||
'''
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
title = "The Asahi Shimbun"
|
||||
__author__ = "Albert Aparicio Isarn"
|
||||
title = 'The Asahi Shimbun'
|
||||
__author__ = 'Albert Aparicio Isarn'
|
||||
|
||||
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
|
||||
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
|
||||
" coverage of cool Japan,focusing on manga, travel and other timely news.")
|
||||
publisher = "The Asahi Shimbun Company"
|
||||
publication_type = "newspaper"
|
||||
category = "news, japan"
|
||||
language = "en_JP"
|
||||
description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
|
||||
' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
|
||||
' coverage of cool Japan,focusing on manga, travel and other timely news.')
|
||||
publisher = 'The Asahi Shimbun Company'
|
||||
publication_type = 'newspaper'
|
||||
category = 'news, japan'
|
||||
language = 'en_JP'
|
||||
|
||||
index = "https://www.asahi.com"
|
||||
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
|
||||
index = 'https://www.asahi.com'
|
||||
masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
|
||||
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = {"id": "MainInner"}
|
||||
remove_tags_after = {"class": "ArticleText"}
|
||||
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
|
||||
remove_tags_before = {'id': 'MainInner'}
|
||||
remove_tags_after = {'class': 'ArticleText'}
|
||||
remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
|
||||
|
||||
def get_whats_new(self):
|
||||
soup = self.index_to_soup(self.index + "/ajw/new")
|
||||
news_section = soup.find("div", attrs={"class": "specialList"})
|
||||
soup = self.index_to_soup(self.index + '/ajw/new')
|
||||
news_section = soup.find('div', attrs={'class': 'specialList'})
|
||||
|
||||
new_news = []
|
||||
|
||||
for item in news_section.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in news_section.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
new_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return new_news
|
||||
|
||||
def get_top6(self, soup):
|
||||
top = soup.find("ul", attrs={"class": "top6"})
|
||||
top = soup.find('ul', attrs={'class': 'top6'})
|
||||
|
||||
top6_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in top.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
top6_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return top6_news
|
||||
|
||||
def get_section_news(self, soup):
|
||||
news_grid = soup.find("ul", attrs={"class": "default"})
|
||||
news_grid = soup.find('ul', attrs={'class': 'default'})
|
||||
|
||||
news = []
|
||||
|
||||
for item in news_grid.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in news_grid.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return news
|
||||
|
||||
def get_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||
|
||||
section_news_items = self.get_top6(soup)
|
||||
section_news_items.extend(self.get_section_news(soup))
|
||||
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
return section_news_items
|
||||
|
||||
def get_special_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
top = soup.find("div", attrs={"class": "Section"})
|
||||
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||
top = soup.find('div', attrs={'class': 'Section'})
|
||||
|
||||
special_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
item_a = item.find("a")
|
||||
for item in top.findAll('li'):
|
||||
item_a = item.find('a')
|
||||
|
||||
text_split = item_a.text.strip().split("\n")
|
||||
text_split = item_a.text.strip().split('\n')
|
||||
title = text_split[0]
|
||||
description = text_split[1].strip()
|
||||
|
||||
url = self.index + item_a["href"]
|
||||
url = self.index + item_a['href']
|
||||
|
||||
special_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": "",
|
||||
"url": url,
|
||||
"description": description,
|
||||
'title': title,
|
||||
'date': '',
|
||||
'url': url,
|
||||
'description': description,
|
||||
}
|
||||
)
|
||||
|
||||
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
("What's New", self.get_whats_new()),
|
||||
("National Report", self.get_section("national_report")),
|
||||
("Politics", self.get_section("politics")),
|
||||
("Business", self.get_section("business")),
|
||||
("Asia & World - China", self.get_section("asia_world/china")),
|
||||
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
|
||||
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
|
||||
("Asia & World - World", self.get_section("asia_world/world")),
|
||||
("Sci & Tech", self.get_section("sci_tech")),
|
||||
("Culture - Style", self.get_section("culture/style")),
|
||||
('National Report', self.get_section('national_report')),
|
||||
('Politics', self.get_section('politics')),
|
||||
('Business', self.get_section('business')),
|
||||
('Asia & World - China', self.get_section('asia_world/china')),
|
||||
('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
|
||||
('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
|
||||
('Asia & World - World', self.get_section('asia_world/world')),
|
||||
('Sci & Tech', self.get_section('sci_tech')),
|
||||
('Culture - Style', self.get_section('culture/style')),
|
||||
# ("Culture - Cooking", self.get_section("culture/cooking")),
|
||||
("Culture - Movies", self.get_section("culture/movies")),
|
||||
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
|
||||
("Travel", self.get_section("travel")),
|
||||
("Sports", self.get_section("sports")),
|
||||
("Opinion - Editorial", self.get_section("opinion/editorial")),
|
||||
("Opinion - Vox Populi", self.get_section("opinion/vox")),
|
||||
("Opinion - Views", self.get_section("opinion/views")),
|
||||
("Special", self.get_special_section("special")),
|
||||
('Culture - Movies', self.get_section('culture/movies')),
|
||||
('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
|
||||
('Travel', self.get_section('travel')),
|
||||
('Sports', self.get_section('sports')),
|
||||
('Opinion - Editorial', self.get_section('opinion/editorial')),
|
||||
('Opinion - Vox Populi', self.get_section('opinion/vox')),
|
||||
('Opinion - Views', self.get_section('opinion/views')),
|
||||
('Special', self.get_special_section('special')),
|
||||
]
|
||||
|
||||
return feeds
|
||||
|
@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
title = 'The Asian Review of Books'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa: E501
|
||||
publisher = 'The Asian Review of Books'
|
||||
category = 'literature, books, reviews, Asia'
|
||||
oldest_article = 30
|
||||
@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
publication_type = 'magazine'
|
||||
auto_cleanup = True
|
||||
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: "Droid Serif", serif}
|
||||
.entry-title {font-family: "Playfair Display", serif}
|
||||
img {display: block}
|
||||
"""
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
|
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
|
||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa: E501
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
|
@ -12,7 +12,7 @@ test_article = None
|
||||
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||
|
||||
|
||||
# {{{ parse article JSON
|
||||
# parse article JSON {{{
|
||||
def process_image_block(lines, block):
|
||||
caption = block.get('captionText')
|
||||
caption_lines = []
|
||||
|
@ -12,7 +12,7 @@ test_article = None
|
||||
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||
|
||||
|
||||
# {{{ parse article JSON
|
||||
# parse article JSON {{{
|
||||
def process_image_block(lines, block):
|
||||
caption = block.get('captionText')
|
||||
caption_lines = []
|
||||
|
@ -11,7 +11,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'attac.es'
|
||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
|
||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa: E501
|
||||
url = 'http://www.attac.es'
|
||||
language = 'es'
|
||||
tags = 'contrainformación, información alternativa'
|
||||
|
@ -24,4 +24,3 @@ class WwwAvisen_dk(BasicNewsRecipe):
|
||||
feeds = [
|
||||
('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'),
|
||||
]
|
||||
|
||||
|
@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
||||
INDEX = 'http://www.buenosairesherald.com'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h1{font-family: Georgia,serif}
|
||||
#fecha{text-align: right; font-size: small}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class BaikalJournal(BasicNewsRecipe):
|
||||
title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430'
|
||||
__author__ = 'bugmen00t'
|
||||
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa
|
||||
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa
|
||||
description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa: E501
|
||||
publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa: E501
|
||||
category = 'blog'
|
||||
cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg'
|
||||
language = 'ru'
|
||||
|
@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
|
||||
title = u'Bangkok Post'
|
||||
publisher = u'Post Publishing PCL'
|
||||
category = u'News'
|
||||
description = u'The world\'s window to Thailand'
|
||||
description = u"The world's window to Thailand"
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
|
||||
|
||||
|
||||
class barrons(BasicNewsRecipe):
|
||||
title = 'Barron\'s Magazine'
|
||||
title = "Barron's Magazine"
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister '
|
||||
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and '
|
||||
"Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
|
||||
"publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
|
||||
'relevant statistics.'
|
||||
)
|
||||
language = 'en_US'
|
||||
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYYMMDD format)',
|
||||
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
|
||||
'long': "For example, 20240722.\nIf it didn't work, try again later."
|
||||
}
|
||||
}
|
||||
|
||||
@ -127,7 +127,7 @@ class barrons(BasicNewsRecipe):
|
||||
desc += ' | ' + self.tag_to_string(summ)
|
||||
self.log('\t', title, ' ', url, '\n\t', desc)
|
||||
ans[section].append({'title': title, 'url': url, 'description': desc})
|
||||
return [(section, articles) for section, articles in ans.items()]
|
||||
return list(ans.items())
|
||||
|
||||
def print_version(self, url):
|
||||
return url.split('?')[0].replace('/articles/', '/amp/articles/')
|
||||
|
@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
|
||||
|
||||
# Select / de-select the feeds you want in your ebook.
|
||||
feeds = [
|
||||
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"),
|
||||
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"),
|
||||
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"),
|
||||
('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
|
||||
('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
|
||||
('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
|
||||
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
|
||||
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
|
||||
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
|
||||
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
|
||||
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
|
||||
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
|
||||
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
|
||||
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
|
||||
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"),
|
||||
("Science/Environment",
|
||||
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
|
||||
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"),
|
||||
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"),
|
||||
("Entertainment/Arts",
|
||||
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
|
||||
('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
|
||||
('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
|
||||
('Science/Environment',
|
||||
'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
|
||||
('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
|
||||
('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
|
||||
('Entertainment/Arts',
|
||||
'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
|
||||
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
|
||||
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
|
||||
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"),
|
||||
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"),
|
||||
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
|
||||
('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
|
||||
('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
|
||||
('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
|
||||
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
|
||||
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
|
||||
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
|
||||
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
|
||||
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
|
||||
("Sport Front Page",
|
||||
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
|
||||
('Sport Front Page',
|
||||
'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
|
||||
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
|
||||
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
|
||||
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
|
||||
|
@ -201,24 +201,24 @@ class BBCBrasilRecipe(BasicNewsRecipe):
|
||||
conversion_options = {'smarten_punctuation': True}
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \
|
||||
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published, .datestamp { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
||||
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
|
||||
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
|
||||
extra_css = '''body { font-family: verdana, helvetica, sans-serif; }
|
||||
.introduction, .first { font-weight: bold; }
|
||||
.cross-head { font-weight: bold; font-size: 125%; }
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; }
|
||||
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; }
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position,
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block;
|
||||
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; }
|
||||
.story-date, .published, .datestamp { font-size: 80%; }
|
||||
table { width: 100%; }
|
||||
td img { display: block; margin: 5px auto; }
|
||||
ul { padding-top: 10px; }
|
||||
ol { padding-top: 10px; }
|
||||
li { padding-top: 5px; padding-bottom: 5px; }
|
||||
h1 { text-align: center; font-size: 175%; font-weight: bold; }
|
||||
h2 { text-align: center; font-size: 150%; font-weight: bold; }
|
||||
h3 { text-align: center; font-size: 125%; font-weight: bold; }
|
||||
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'''
|
||||
|
||||
# Remove various tag attributes to improve the look of the ebook pages.
|
||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
|
||||
# Handle sports page urls type 01:
|
||||
if (url.find("go/rss/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/-/", "")
|
||||
if (url.find('go/rss/-/sport1/') != -1):
|
||||
temp_url = url.replace('go/rss/-/', '')
|
||||
|
||||
# Handle sports page urls type 02:
|
||||
elif (url.find("go/rss/int/news/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
elif (url.find('go/rss/int/news/-/sport1/') != -1):
|
||||
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||
|
||||
# Handle regular news page urls:
|
||||
else:
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||
|
||||
# Always add "?print=true" to the end of the url.
|
||||
print_url = temp_url + "?print=true"
|
||||
print_url = temp_url + '?print=true'
|
||||
|
||||
return print_url
|
||||
|
||||
|
@ -36,7 +36,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
remove_tags_after = dict(name='div', attrs={'class': [' g-w8']})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa
|
||||
dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa: E501
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -134,7 +134,7 @@ if __name__ == '__main__':
|
||||
class BBC(BasicNewsRecipe):
|
||||
title = 'BBC News (fast)'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa
|
||||
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa: E501
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
@ -21,7 +21,7 @@ class BBC(BasicNewsRecipe):
|
||||
category = 'sport, news, UK, world'
|
||||
language = 'en_GB'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class BeforeWeGo(BasicNewsRecipe):
|
||||
title = 'Before We Go'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa
|
||||
description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa: E501
|
||||
publisher = 'BEFOREWEGOBLOG'
|
||||
category = 'blog'
|
||||
# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg'
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Bellingcat(BasicNewsRecipe):
|
||||
title = 'Bellingcat'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa
|
||||
description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa: E501
|
||||
publisher = 'Stichting Bellingcat'
|
||||
category = 'blog'
|
||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Bellingcat(BasicNewsRecipe):
|
||||
title = 'Bellingcat'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa
|
||||
description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa: E501
|
||||
publisher = 'Stichting Bellingcat'
|
||||
category = 'blog'
|
||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Bellingcat(BasicNewsRecipe):
|
||||
title = 'Bellingcat'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa
|
||||
description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa: E501
|
||||
publisher = 'Stichting Bellingcat'
|
||||
category = 'blog'
|
||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Bellingcat(BasicNewsRecipe):
|
||||
title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa
|
||||
description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa: E501
|
||||
publisher = 'Stichting Bellingcat'
|
||||
category = 'blog'
|
||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Bellingcat(BasicNewsRecipe):
|
||||
title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)'
|
||||
__author__ = 'bugmen00t'
|
||||
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa
|
||||
description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa: E501
|
||||
publisher = 'Stichting Bellingcat'
|
||||
category = 'blog'
|
||||
cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg'
|
||||
|
@ -17,13 +17,13 @@ class BenchmarkPl(BasicNewsRecipe):
|
||||
extra_css = 'ul {list-style-type: none;}'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
||||
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa: E501, RUF039
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: RUF039
|
||||
|
||||
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
|
||||
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={ 'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa
|
||||
remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa: E501
|
||||
|
||||
INDEX = 'http://www.benchmark.pl'
|
||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||
|
@ -25,7 +25,6 @@ class bergfreunde_blog(BasicNewsRecipe):
|
||||
__author__ = 'VoHe'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
|
@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
dict(
|
||||
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
|
||||
dict(name='img', attrs={'alt': 'logo'}),
|
||||
dict(name='div', attrs={'class': re.compile('infoEl')}),
|
||||
dict(name='span', attrs={'class': re.compile('loupe')})
|
||||
dict(name='div', attrs={'class': re.compile(r'infoEl')}),
|
||||
dict(name='span', attrs={'class': re.compile(r'loupe')})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'itemprop': re.compile('articleBody')})
|
||||
dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
|
||||
feeds.append(("O'Reilly Factor", articles_shows))
|
||||
|
||||
if articles_columns:
|
||||
feeds.append(("Newspaper Column", articles_columns))
|
||||
feeds.append(('Newspaper Column', articles_columns))
|
||||
|
||||
return feeds
|
||||
|
||||
@ -49,8 +49,7 @@ class BillOReilly(BasicNewsRecipe):
|
||||
continue
|
||||
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.billoreilly.com' + url + \
|
||||
'&dest=/pg/jsp/community/tvshowprint.jsp'
|
||||
url = 'http://www.billoreilly.com' + url + '&dest=/pg/jsp/community/tvshowprint.jsp'
|
||||
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
|
@ -57,8 +57,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.birminghammail.co.uk')
|
||||
cov = soup.find(attrs={'src': re.compile(
|
||||
'http://images.icnetwork.co.uk/upl/birm')})
|
||||
cov = soup.find(attrs={'src': re.compile(r'http://images.icnetwork.co.uk/upl/birm')})
|
||||
cov = str(cov)
|
||||
cov2 = re.findall(
|
||||
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
|
@ -27,8 +27,8 @@ class bleskRecipe(BasicNewsRecipe):
|
||||
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
extra_css = '''
|
||||
'''
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
|
||||
|
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Blic(BasicNewsRecipe):
|
||||
title = 'Blic'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa
|
||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa: E501
|
||||
publisher = 'RINGIER d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Georgia, serif1, serif}
|
||||
@ -35,13 +35,13 @@ class Blic(BasicNewsRecipe):
|
||||
.potpis{font-size: x-small; color: gray}
|
||||
.article_info{font-size: small}
|
||||
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039
|
||||
remove_tags_before = dict(name='div', attrs={'id': 'article_info'})
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])]
|
||||
|
@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
|
||||
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
|
||||
description = (
|
||||
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
|
||||
' companies, events, and trends shaping today\'s complex, global economy.'
|
||||
" companies, events, and trends shaping today's complex, global economy."
|
||||
)
|
||||
remove_empty_feeds = True
|
||||
|
||||
@ -124,8 +124,8 @@ class Bloomberg(BasicNewsRecipe):
|
||||
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
||||
|
||||
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
|
||||
elif 'summary' in data and data['summary']:
|
||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
|
||||
elif data.get('summary'):
|
||||
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
||||
|
||||
if 'byline' in data and data['byline'] is not None:
|
||||
|
@ -134,8 +134,8 @@ class Bloomberg(BasicNewsRecipe):
|
||||
cat = '<div class="cat">' + data['primaryCategory'] + '</div>'
|
||||
|
||||
if 'abstract' in data and data['abstract'] and data['abstract'] is not None:
|
||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join([x for x in data['abstract']]) + '</li></ul></div>'
|
||||
elif 'summary' in data and data['summary']:
|
||||
subhead = '<div class="subhead"><ul><li>' + '</li><li>'.join(list(data['abstract'])) + '</li></ul></div>'
|
||||
elif data.get('summary'):
|
||||
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
||||
|
||||
if 'byline' in data and data['byline'] is not None:
|
||||
|
@ -2,29 +2,29 @@ from urllib.parse import urljoin
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
_issue_url = ""
|
||||
_issue_url = ''
|
||||
|
||||
|
||||
class BookforumMagazine(BasicNewsRecipe):
|
||||
title = "Bookforum"
|
||||
title = 'Bookforum'
|
||||
description = (
|
||||
"Bookforum is an American book review magazine devoted to books and "
|
||||
"the discussion of literature. https://www.bookforum.com/print"
|
||||
'Bookforum is an American book review magazine devoted to books and '
|
||||
'the discussion of literature. https://www.bookforum.com/print'
|
||||
)
|
||||
language = "en"
|
||||
__author__ = "ping"
|
||||
publication_type = "magazine"
|
||||
encoding = "utf-8"
|
||||
language = 'en'
|
||||
__author__ = 'ping'
|
||||
publication_type = 'magazine'
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = False
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 8
|
||||
|
||||
keep_only_tags = [dict(class_="blog-article")]
|
||||
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
|
||||
keep_only_tags = [dict(class_='blog-article')]
|
||||
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
||||
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
||||
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
|
||||
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
|
||||
display: block; max-width: 100%; height: auto;
|
||||
}
|
||||
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
|
||||
"""
|
||||
'''
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# strip away links that's not needed
|
||||
for ele in soup.select(".blog-article__header a"):
|
||||
for ele in soup.select('.blog-article__header a'):
|
||||
ele.unwrap()
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(
|
||||
_issue_url if _issue_url else "https://www.bookforum.com/print"
|
||||
_issue_url if _issue_url else 'https://www.bookforum.com/print'
|
||||
)
|
||||
meta_ele = soup.find("meta", property="og:title")
|
||||
meta_ele = soup.find('meta', property='og:title')
|
||||
if meta_ele:
|
||||
self.timefmt = f' [{meta_ele["content"]}]'
|
||||
|
||||
cover_ele = soup.find("img", class_="toc-issue__cover")
|
||||
cover_ele = soup.find('img', class_='toc-issue__cover')
|
||||
if cover_ele:
|
||||
self.cover_url = urljoin(
|
||||
"https://www.bookforum.com",
|
||||
soup.find("img", class_="toc-issue__cover")["src"],
|
||||
'https://www.bookforum.com',
|
||||
soup.find('img', class_='toc-issue__cover')['src'],
|
||||
)
|
||||
|
||||
articles = {}
|
||||
for sect_ele in soup.find_all("div", class_="toc-articles__section"):
|
||||
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
|
||||
section_name = self.tag_to_string(
|
||||
sect_ele.find("a", class_="toc__anchor-links__link")
|
||||
sect_ele.find('a', class_='toc__anchor-links__link')
|
||||
)
|
||||
for article_ele in sect_ele.find_all("article"):
|
||||
title_ele = article_ele.find("h1")
|
||||
sub_title_ele = article_ele.find(class_="toc-article__subtitle")
|
||||
for article_ele in sect_ele.find_all('article'):
|
||||
title_ele = article_ele.find('h1')
|
||||
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
|
||||
articles.setdefault(section_name, []).append(
|
||||
{
|
||||
"title": self.tag_to_string(title_ele),
|
||||
"url": article_ele.find("a", class_="toc-article__link")[
|
||||
"href"
|
||||
'title': self.tag_to_string(title_ele),
|
||||
'url': article_ele.find('a', class_='toc-article__link')[
|
||||
'href'
|
||||
],
|
||||
"description": self.tag_to_string(sub_title_ele)
|
||||
'description': self.tag_to_string(sub_title_ele)
|
||||
if sub_title_ele
|
||||
else "",
|
||||
else '',
|
||||
}
|
||||
)
|
||||
return articles.items()
|
||||
|
@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
|
||||
language = 'da'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name="h1", attrs={'itemprop': 'headline'}),
|
||||
dict(name="div", attrs={'itemprob': 'datePublished'}),
|
||||
dict(name="div", attrs={'itemprop': 'articleBody'}),
|
||||
dict(name='h1', attrs={'itemprop': 'headline'}),
|
||||
dict(name='div', attrs={'itemprob': 'datePublished'}),
|
||||
dict(name='div', attrs={'itemprop': 'articleBody'}),
|
||||
]
|
||||
|
||||
# Feed are found here:
|
||||
|
@ -42,24 +42,24 @@ def class_startswith(*prefixes):
|
||||
|
||||
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
|
||||
comics_to_fetch = {
|
||||
"ADAM@HOME": 'ad',
|
||||
"ARLO & JANIS": 'aj',
|
||||
'ADAM@HOME': 'ad',
|
||||
'ARLO & JANIS': 'aj',
|
||||
# "CUL DE SAC": 'cds',
|
||||
# "CURTIS": 'kfcrt',
|
||||
"DILBERT": 'dt',
|
||||
"DOONESBURY": 'db',
|
||||
"DUSTIN": 'kfdus',
|
||||
"F MINUS": 'fm',
|
||||
"FOR BETTER OR WORSE": 'fb',
|
||||
'DILBERT': 'dt',
|
||||
'DOONESBURY': 'db',
|
||||
'DUSTIN': 'kfdus',
|
||||
'F MINUS': 'fm',
|
||||
'FOR BETTER OR WORSE': 'fb',
|
||||
# "GET FUZZY": 'gz',
|
||||
# "MOTHER GOOSE & GRIMM": 'tmmgg',
|
||||
# "JUMPSTART": 'jt',
|
||||
"MONTY": 'mt',
|
||||
'MONTY': 'mt',
|
||||
# "POOCH CAFE",
|
||||
"RHYMES WITH ORANGE": 'kfrwo',
|
||||
'RHYMES WITH ORANGE': 'kfrwo',
|
||||
# "ROSE IS ROSE": 'rr',
|
||||
# "ZIPPY THE PINHEAD": 'kfzpy',
|
||||
"ZITS": 'kfzt'
|
||||
'ZITS': 'kfzt'
|
||||
}
|
||||
|
||||
|
||||
@ -77,10 +77,10 @@ def extract_json(raw_html):
|
||||
|
||||
|
||||
def absolutize_url(url):
|
||||
if url.startswith("//"):
|
||||
return "https:" + url
|
||||
if url.startswith('//'):
|
||||
return 'https:' + url
|
||||
if url.startswith('/'):
|
||||
url = "https://www.bostonglobe.com" + url
|
||||
url = 'https://www.bostonglobe.com' + url
|
||||
return url
|
||||
|
||||
|
||||
@ -120,7 +120,7 @@ def main():
|
||||
|
||||
class BostonGlobeSubscription(BasicNewsRecipe):
|
||||
|
||||
title = "Boston Globe"
|
||||
title = 'Boston Globe'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'The Boston Globe'
|
||||
language = 'en_US'
|
||||
|
@ -12,6 +12,7 @@ def class_as_string(x):
|
||||
x = ' '.join(x)
|
||||
return x
|
||||
|
||||
|
||||
def class_startswith(*prefixes):
|
||||
|
||||
def q(x):
|
||||
@ -24,18 +25,19 @@ def class_startswith(*prefixes):
|
||||
|
||||
return dict(attrs={'class': q})
|
||||
|
||||
|
||||
def absolutize_url(url):
|
||||
if url.startswith("//"):
|
||||
return "https:" + url
|
||||
if url.startswith('//'):
|
||||
return 'https:' + url
|
||||
if url.startswith('/'):
|
||||
url = "https://www.bostonglobe.com" + url
|
||||
url = 'https://www.bostonglobe.com' + url
|
||||
return url
|
||||
|
||||
|
||||
class BostonGlobePrint(BasicNewsRecipe):
|
||||
title = "Boston Globe | Print Edition"
|
||||
title = 'Boston Globe | Print Edition'
|
||||
__author__ = 'Kovid Goyal, unkn0wn'
|
||||
description = 'The Boston Globe - Today\'s Paper'
|
||||
description = "The Boston Globe - Today's Paper"
|
||||
language = 'en_US'
|
||||
|
||||
keep_only_tags = [
|
||||
@ -70,7 +72,7 @@ class BostonGlobePrint(BasicNewsRecipe):
|
||||
for image in soup.findAll('img', src=True):
|
||||
if image['src'].endswith('750.jpg'):
|
||||
return 'https:' + image['src']
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
@ -94,8 +96,8 @@ class BostonGlobePrint(BasicNewsRecipe):
|
||||
desc = self.tag_to_string(d)
|
||||
|
||||
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
return list(feeds_dict.items())
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
soup = self.index_to_soup(raw_html)
|
||||
|
@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1467715002(BasicNewsRecipe):
|
||||
title = 'Breaking Mad'
|
||||
__author__ = 'bugmen00t'
|
||||
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa
|
||||
description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa: E501
|
||||
publisher = 'BreakingMad'
|
||||
category = 'news'
|
||||
cover_url = u'http://breakingmad.me/images/logo.png'
|
||||
|
@ -5,7 +5,6 @@ from __future__ import print_function
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -23,40 +22,39 @@ class brewiarz(BasicNewsRecipe):
|
||||
next_days = 1
|
||||
|
||||
def parse_index(self):
|
||||
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
|
||||
"05": "v", "06": "vi", "07": "vii", "08": "viii",
|
||||
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
|
||||
dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
|
||||
'05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
|
||||
'09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
|
||||
|
||||
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
|
||||
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
|
||||
weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
|
||||
'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
|
||||
|
||||
now = datetime.datetime.now()
|
||||
|
||||
feeds = []
|
||||
for i in range(0, self.next_days):
|
||||
for i in range(self.next_days):
|
||||
url_date = now + datetime.timedelta(days=i)
|
||||
url_date_month = url_date.strftime("%m")
|
||||
url_date_month = url_date.strftime('%m')
|
||||
url_date_month_roman = dec2rom_dict[url_date_month]
|
||||
url_date_day = url_date.strftime("%d")
|
||||
url_date_year = url_date.strftime("%Y")[2:]
|
||||
url_date_weekday = url_date.strftime("%A")
|
||||
url_date_day = url_date.strftime('%d')
|
||||
url_date_year = url_date.strftime('%Y')[2:]
|
||||
url_date_weekday = url_date.strftime('%A')
|
||||
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
||||
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \
|
||||
url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
|
||||
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' +
|
||||
url_date_year + '/' + url_date_day + url_date_month + '/index.php3')
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
title = url_date_weekday_pl + " " + url_date_day + \
|
||||
"." + url_date_month + "." + url_date_year
|
||||
title = (url_date_weekday_pl + ' ' + url_date_day +
|
||||
'.' + url_date_month + '.' + url_date_year)
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
sectors = self.get_sectors(url)
|
||||
for subpage in sectors:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + \
|
||||
url_date_month + "." + url_date_year + " - " + subpage.string
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \
|
||||
"/" + url_date_day + url_date_month + \
|
||||
"/" + subpage['href']
|
||||
title = (url_date_weekday_pl + ' ' + url_date_day + '.' +
|
||||
url_date_month + '.' + url_date_year + ' - ' + subpage.string)
|
||||
url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year +
|
||||
'/' + url_date_day + url_date_month + '/' + subpage['href'])
|
||||
print(url)
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
@ -91,9 +89,8 @@ class brewiarz(BasicNewsRecipe):
|
||||
sublinks = ol.findAll(name='a')
|
||||
for sublink in sublinks:
|
||||
link_title = self.tag_to_string(
|
||||
link) + " - " + self.tag_to_string(sublink)
|
||||
link_url_print = re.sub(
|
||||
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
|
||||
link) + ' - ' + self.tag_to_string(sublink)
|
||||
link_url_print = sublink['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
@ -102,8 +99,7 @@ class brewiarz(BasicNewsRecipe):
|
||||
continue
|
||||
else:
|
||||
link_title = self.tag_to_string(link)
|
||||
link_url_print = re.sub(
|
||||
'php3', 'php3?kr=_druk&wr=lg&', link['href'])
|
||||
link_url_print = link['href'].replace('php3', 'php3?kr=_druk&wr=lg&')
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
@ -145,7 +141,7 @@ class brewiarz(BasicNewsRecipe):
|
||||
if x == tag:
|
||||
break
|
||||
else:
|
||||
print("Can't find", tag, "in", tag.parent)
|
||||
print("Can't find", tag, 'in', tag.parent)
|
||||
continue
|
||||
for r in reversed(tag.contents):
|
||||
tag.parent.insert(i, r)
|
||||
|
@ -16,7 +16,7 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||
|
||||
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa
|
||||
remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa: E501
|
||||
dict(id=['header', 'artTools', 'context', 'interact',
|
||||
'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
|
||||
dict(name=['hjtrs', 'kud'])]
|
||||
|
@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -15,8 +15,6 @@ class BusinessStandardPrint(BasicNewsRecipe):
|
||||
language = 'en_IN'
|
||||
masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png'
|
||||
encoding = 'utf-8'
|
||||
resolve_internal_links = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
@ -64,7 +62,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
|
||||
if dt.weekday() == 6:
|
||||
self.log.warn(
|
||||
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
|
||||
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.'
|
||||
" And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
|
||||
)
|
||||
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
|
||||
raw = self.index_to_soup(url, raw=True)
|
||||
|
@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
|
||||
|
||||
# Insert feeds in specified order, if available
|
||||
|
||||
feedSort = ['Editor\'s Note', 'Editors note']
|
||||
feedSort = ["Editor's Note", 'Editors note']
|
||||
for i in feedSort:
|
||||
if i in sections:
|
||||
feeds.append((i, sections[i]))
|
||||
@ -98,8 +98,7 @@ class BT(BasicNewsRecipe):
|
||||
# Done with the sorted feeds
|
||||
|
||||
for i in feedSort:
|
||||
if i in sections:
|
||||
del sections[i]
|
||||
sections.pop(i, None)
|
||||
|
||||
# Append what is left over...
|
||||
|
||||
|
@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CACM(BasicNewsRecipe):
|
||||
title = "ACM CACM Magazine"
|
||||
description = "Published on day 1 of every month."
|
||||
title = 'ACM CACM Magazine'
|
||||
description = 'Published on day 1 of every month.'
|
||||
language = 'en'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
"""
|
||||
'''
|
||||
Parse out cover URL from cover page.
|
||||
Example:
|
||||
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
|
||||
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
|
||||
"""
|
||||
'''
|
||||
|
||||
soup = self.index_to_soup("https://cacm.acm.org/")
|
||||
a_img = soup.find("a", class_="menuCover")
|
||||
img_url = a_img.img["src"]
|
||||
img_url = img_url.split("?")[0]
|
||||
img_url = img_url.replace(".large", "")
|
||||
soup = self.index_to_soup('https://cacm.acm.org/')
|
||||
a_img = soup.find('a', class_='menuCover')
|
||||
img_url = a_img.img['src']
|
||||
img_url = img_url.split('?')[0]
|
||||
img_url = img_url.replace('.large', '')
|
||||
return img_url
|
||||
|
@ -9,8 +9,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
title = u'Calcalist'
|
||||
language = 'he'
|
||||
__author__ = 'marbs'
|
||||
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa
|
||||
simultaneous_downloads = 5
|
||||
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa: E501
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
@ -23,34 +22,33 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class': 'ArticleBodyComponent'}),
|
||||
]
|
||||
remove_tags = [dict(name='p', attrs={'text': [' ']})]
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p> </p>', re.DOTALL | re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"),
|
||||
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"),
|
||||
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"),
|
||||
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"),
|
||||
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"),
|
||||
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"),
|
||||
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"),
|
||||
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"),
|
||||
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"),
|
||||
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"),
|
||||
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"),
|
||||
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"),
|
||||
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"),
|
||||
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"),
|
||||
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"),
|
||||
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"),
|
||||
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"),
|
||||
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"),
|
||||
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"),
|
||||
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"),
|
||||
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"),
|
||||
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"),
|
||||
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"),
|
||||
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml")
|
||||
(u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
|
||||
(u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
|
||||
(u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
|
||||
(u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
|
||||
(u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
|
||||
(u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
|
||||
(u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
|
||||
(u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
|
||||
(u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
|
||||
(u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
|
||||
(u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
|
||||
(u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
|
||||
(u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
|
||||
(u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
|
||||
(u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
|
||||
(u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
|
||||
(u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
|
||||
(u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
|
||||
(u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
|
||||
(u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
|
||||
(u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
|
||||
(u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
|
||||
(u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
|
||||
(u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
|
||||
]
|
||||
|
@ -60,20 +60,20 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
# un-comment the following six lines for the Vancouver Province
|
||||
# title = u'Vancouver Province'
|
||||
# url_prefix = 'http://www.theprovince.com'
|
||||
# description = u'News from Vancouver, BC'
|
||||
# std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
|
||||
# logo_url = 'vplogo.jpg'
|
||||
# fp_tag = 'CAN_TP'
|
||||
# # title = u'Vancouver Province'
|
||||
# # url_prefix = 'http://www.theprovince.com'
|
||||
# # description = u'News from Vancouver, BC'
|
||||
# # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
|
||||
# # logo_url = 'vplogo.jpg'
|
||||
# # fp_tag = 'CAN_TP'
|
||||
|
||||
# un-comment the following six lines for the Vancouver Sun
|
||||
# title = u'Vancouver Sun'
|
||||
# url_prefix = 'http://www.vancouversun.com'
|
||||
# description = u'News from Vancouver, BC'
|
||||
# std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
|
||||
# logo_url = 'vslogo.jpg'
|
||||
# fp_tag = 'CAN_VS'
|
||||
# # title = u'Vancouver Sun'
|
||||
# # url_prefix = 'http://www.vancouversun.com'
|
||||
# # description = u'News from Vancouver, BC'
|
||||
# # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
|
||||
# # logo_url = 'vslogo.jpg'
|
||||
# # fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following six lines for the Calgary Herald
|
||||
title = u'Calgary Herald'
|
||||
@ -90,7 +90,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
# # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
|
||||
# # logo_url = 'ejlogo.jpg'
|
||||
# # fp_tag = 'CAN_EJ'
|
||||
#
|
||||
|
||||
# un-comment the following six lines for the Ottawa Citizen
|
||||
# # title = u'Ottawa Citizen'
|
||||
# # url_prefix = 'http://www.ottawacitizen.com'
|
||||
@ -98,7 +98,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
# # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||
# # logo_url = 'oclogo.jpg'
|
||||
# # fp_tag = 'CAN_OC'
|
||||
#
|
||||
|
||||
# un-comment the following six lines for the Montreal Gazette
|
||||
# # title = u'Montreal Gazette'
|
||||
# # url_prefix = 'http://www.montrealgazette.com'
|
||||
@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
|
||||
|
||||
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
|
||||
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
|
||||
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
name='div', attrs={'class': 'copyright'}),
|
||||
dict(name='div', attrs={'class': 'rule_grey_solid'}),
|
||||
dict(name='div', attrs={'id': 'soundoff'}),
|
||||
dict(name='div', attrs={'id': re.compile('flyer')}),
|
||||
dict(name='div', attrs={'id': re.compile(r'flyer')}),
|
||||
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
|
||||
|
||||
def get_cover_url(self):
|
||||
@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
except:
|
||||
while daysback < 7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \
|
||||
str((date.today() - timedelta(days=daysback)).day) + \
|
||||
'/lg/' + self.fp_tag + '.jpg'
|
||||
str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg'
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
continue
|
||||
break
|
||||
if daysback == 7:
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self, string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91", "‘", string)
|
||||
fixed = string.replace('\x91', '‘')
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92", "’", fixed)
|
||||
fixed = fixed.replace('\x92', '’')
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93", "“", fixed)
|
||||
fixed = fixed.replace('\x93', '“')
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94", "”", fixed)
|
||||
fixed = fixed.replace('\x94', '”')
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96", "–", fixed)
|
||||
fixed = fixed.replace('\x96', '–')
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97", "—", fixed)
|
||||
fixed = re.sub("’", "’", fixed)
|
||||
fixed = fixed.replace('\x97', '—')
|
||||
fixed = fixed.replace('’', '’')
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
if url.startswith('/'):
|
||||
url = self.url_prefix + url
|
||||
if not url.startswith(self.url_prefix):
|
||||
print("Rejected " + url)
|
||||
print('Rejected ' + url)
|
||||
return
|
||||
if url in self.url_list:
|
||||
print("Rejected dup " + url)
|
||||
print('Rejected dup ' + url)
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(atag, False)
|
||||
@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
return
|
||||
dtag = adiv.find('div', 'content')
|
||||
description = ''
|
||||
print("URL " + url)
|
||||
print("TITLE " + title)
|
||||
print('URL ' + url)
|
||||
print('TITLE ' + title)
|
||||
if dtag is not None:
|
||||
stag = dtag.span
|
||||
if stag is not None:
|
||||
@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
description = self.tag_to_string(stag, False)
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
print('DESCRIPTION: ' + description)
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
|
||||
def parse_web_index(key, keyurl):
|
||||
print("Section: " + key + ': ' + self.url_prefix + keyurl)
|
||||
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix + keyurl)
|
||||
except:
|
||||
print("Section: " + key + ' NOT FOUND')
|
||||
print('Section: ' + key + ' NOT FOUND')
|
||||
return
|
||||
ans.append(key)
|
||||
mainsoup = soup.find('div', 'bodywrapper')
|
||||
@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}):
|
||||
handle_article(wdiv, key)
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
for k,url in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -4,7 +4,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1271446252(BasicNewsRecipe):
|
||||
title = u'CanardPC'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'fr'
|
||||
__author__ = 'zorgluf'
|
||||
max_articles_per_feed = 25
|
||||
|
@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='p'),
|
||||
dict(name='span', attrs={'id': ["textbody"]})
|
||||
dict(name='span', attrs={'id': ['textbody']})
|
||||
]
|
||||
|
||||
# 3 posts seemed to have utf8 encoding
|
||||
@ -36,6 +36,6 @@ class Capital(BasicNewsRecipe):
|
||||
'http://www.capital.gr/articles/articlesrss.asp?catid=4'),
|
||||
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F',
|
||||
'http://www.capital.gr/articles/articlesrss.asp?catid=6'),
|
||||
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa
|
||||
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa: E501
|
||||
'http://www.capital.gr/articles/articlesrss.asp?catid=8'),
|
||||
]
|
||||
|
@ -15,6 +15,7 @@ def absurl(x):
|
||||
x = 'https://caravanmagazine.in' + x
|
||||
return x
|
||||
|
||||
|
||||
def safe_dict(data, *names):
|
||||
ans = data
|
||||
for x in names:
|
||||
@ -49,6 +50,7 @@ def parse_body(x):
|
||||
yield from parse_body(p)
|
||||
yield '</p>'
|
||||
|
||||
|
||||
def parse_p(p):
|
||||
if p.get('type', '') == 'text':
|
||||
if 'marks' in p:
|
||||
@ -96,7 +98,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self, *args, **kw)
|
||||
if not self.username or not self.password:
|
||||
return br
|
||||
data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}})
|
||||
data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}})
|
||||
if not isinstance(data, bytes):
|
||||
data = data.encode('utf-8')
|
||||
rq = Request(
|
||||
@ -138,7 +140,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
x = d.split('-')
|
||||
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
|
||||
inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}})
|
||||
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
|
||||
|
||||
raw = json.loads(self.index_to_soup(api, raw=True))
|
||||
@ -174,7 +176,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
slug = urlparse(url).path
|
||||
inp = json.dumps({"0":{"json":{"slug":slug}}})
|
||||
inp = json.dumps({'0':{'json':{'slug':slug}}})
|
||||
return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
@ -211,6 +213,6 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
for x in art_cont['premiumContent']:
|
||||
premium_cont += '\n' + ''.join(parse_body(x))
|
||||
|
||||
return '<html><body><div>' \
|
||||
+ cat + title + desc + auth + lede + free_cont + premium_cont + \
|
||||
'</div></body></html>'
|
||||
return ('<html><body><div>'
|
||||
+ cat + title + desc + auth + lede + free_cont + premium_cont +
|
||||
'</div></body></html>')
|
||||
|
@ -5,9 +5,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CATOInstitute(BasicNewsRecipe):
|
||||
title = u'The CATO Institute'
|
||||
description = "The Cato Institute is a public policy research organization — a think tank — \
|
||||
dedicated to the principles of individual liberty, limited government, free markets and peace.\
|
||||
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues."
|
||||
description = ('The Cato Institute is a public policy research organization — a think tank — '
|
||||
'dedicated to the principles of individual liberty, limited government, free markets and peace. '
|
||||
'Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues.')
|
||||
__author__ = '_reader'
|
||||
__date__ = '05 July 2012'
|
||||
__version__ = '1.0'
|
||||
|
@ -31,7 +31,6 @@ class CBN(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'iso-8859-1'
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
|
@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Cedar(BasicNewsRecipe):
|
||||
title = u'\u041A\u0435\u0434\u0440'
|
||||
description = u'\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043C\u0435\u0434\u0438\u0430 \u043E\u0431 \u043E\u043A\u0440\u0443\u0436\u0430\u044E\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u0435' # noqa
|
||||
description = u'\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043C\u0435\u0434\u0438\u0430 \u043E\u0431 \u043E\u043A\u0440\u0443\u0436\u0430\u044E\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u0435' # noqa: E501
|
||||
__author__ = 'bugmen00t'
|
||||
publication_type = 'blog'
|
||||
oldest_article = 30
|
||||
|
@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Cherta(BasicNewsRecipe):
|
||||
title = '\u0427\u0435\u0440\u0442\u0430'
|
||||
__author__ = 'bugmen00t'
|
||||
description = ' \u0418\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0435, \u0432\u0430\u0436\u043D\u044B\u0435 \u0438 \u0433\u043B\u0443\u0431\u043E\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043F\u0440\u043E \u043D\u0430\u0441\u0438\u043B\u0438\u0435 \u0438 \u043D\u0435\u0440\u0430\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.' # noqa
|
||||
description = ' \u0418\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0435, \u0432\u0430\u0436\u043D\u044B\u0435 \u0438 \u0433\u043B\u0443\u0431\u043E\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043F\u0440\u043E \u043D\u0430\u0441\u0438\u043B\u0438\u0435 \u0438 \u043D\u0435\u0440\u0430\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.' # noqa: E501
|
||||
publisher = 'cherta.media'
|
||||
category = 'blog'
|
||||
cover_url = u'https://cherta.media/wp-content/uploads/2022/01/cherta_snippet2.png'
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user