diff --git a/icons/icns/make_iconsets.py b/icons/icns/make_iconsets.py index c9bf6f88dc..aff3f2fa72 100644 --- a/icons/icns/make_iconsets.py +++ b/icons/icns/make_iconsets.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 __license__ = 'GPL v3' @@ -27,7 +26,7 @@ for name, src in sources.items(): os.chdir(iconset) try: for sz in (16, 32, 128, 256, 512, 1024): - iname = 'icon_{0}x{0}.png'.format(sz) + iname = f'icon_{sz}x{sz}.png' iname2x = 'icon_{0}x{0}@2x.png'.format(sz // 2) if src.endswith('.svg'): subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname]) @@ -36,7 +35,7 @@ for name, src in sources.items(): if sz == 512: shutil.copy2(src, iname) else: - subprocess.check_call(['convert', src, '-resize', '{0}x{0}'.format(sz), iname]) + subprocess.check_call(['convert', src, '-resize', f'{sz}x{sz}', iname]) if sz > 16: shutil.copy2(iname, iname2x) if sz > 512: diff --git a/icons/make_ico_files.py b/icons/make_ico_files.py index 179d7606db..d8b97ba8ae 100644 --- a/icons/make_ico_files.py +++ b/icons/make_ico_files.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 __license__ = 'GPL v3' @@ -24,7 +23,7 @@ for name, src in sources.items(): try: names = [] for sz in (16, 24, 32, 48, 64, 256): - iname = os.path.join('ico_temp', '{0}x{0}.png'.format(sz)) + iname = os.path.join('ico_temp', f'{sz}x{sz}.png') subprocess.check_call(['rsvg-convert', src, '-w', str(sz), '-h', str(sz), '-o', iname]) subprocess.check_call(['optipng', '-o7', '-strip', 'all', iname]) if sz >= 128: diff --git a/imgsrc/generate.py b/imgsrc/generate.py index 9adaeab79f..8d0e0c5426 100755 --- a/imgsrc/generate.py +++ b/imgsrc/generate.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal import argparse diff --git a/imgsrc/render-logo.py b/imgsrc/render-logo.py index 82d85072ae..0403760849 100755 --- a/imgsrc/render-logo.py +++ b/imgsrc/render-logo.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal diff --git a/imgsrc/srv/generate.py b/imgsrc/srv/generate.py index c56f3140f6..f2600811ba 100644 --- a/imgsrc/srv/generate.py +++ b/imgsrc/srv/generate.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal @@ -27,7 +26,7 @@ def clone_node(node, parent): def merge(): base = os.path.dirname(os.path.abspath(__file__)) ans = etree.fromstring( - '' % (SVG_NS, XLINK_NS), + f'', parser=etree.XMLParser( recover=True, no_network=True, resolve_entities=False ) @@ -43,14 +42,14 @@ def merge(): recover=True, no_network=True, resolve_entities=False ) ) - symbol = ans.makeelement('{%s}symbol' % SVG_NS) + symbol = ans.makeelement('{%s}symbol' % SVG_NS) # noqa: UP031 symbol.set('viewBox', svg.get('viewBox')) symbol.set('id', 'icon-' + f.rpartition('.')[0]) for child in svg.iterchildren('*'): clone_node(child, symbol) ans.append(symbol) ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False) - ans = re.sub(']+>', '', ans, count=1) + ans = re.sub(r']+>', '', ans, count=1) return ans diff --git a/manual/build.py b/manual/build.py index f90eaf7b7a..e0dd62b30b 100755 --- a/manual/build.py +++ b/manual/build.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 __license__ = 'GPL v3' diff --git a/manual/conf.py b/manual/conf.py index 98ac86e9ae..33a0efcdcf 100644 --- a/manual/conf.py +++ b/manual/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # calibre documentation build configuration file, created by # sphinx-quickstart.py on Sun Mar 23 01:23:55 2008. # @@ -47,11 +45,11 @@ templates_path = ['templates'] source_suffix = {'.rst': 'restructuredtext'} # The master toctree document. -master_doc = 'index' if tags.has('online') else 'simple_index' # noqa +master_doc = 'index' if tags.has('online') else 'simple_index' # noqa: F821 # kill the warning about index/simple_index not being in a toctree exclude_patterns = ['simple_index.rst'] if master_doc == 'index' else ['index.rst'] exclude_patterns.append('cli-options-header.rst') -if tags.has('gettext'): # noqa +if tags.has('gettext'): # noqa: F821 # Do not exclude anything as the strings must be translated. This will # generate a warning about the documents not being in a toctree, just ignore # it. @@ -64,7 +62,7 @@ language = os.environ.get('CALIBRE_OVERRIDE_LANG', 'en') def generated_langs(): try: return os.listdir(os.path.join(base, 'generated')) - except EnvironmentError as e: + except OSError as e: if e.errno != errno.ENOENT: raise return () @@ -99,13 +97,13 @@ today_fmt = '%B %d, %Y' unused_docs = ['global', 'cli/global'] locale_dirs = ['locale/'] -title = '%s User Manual' % __appname__ +title = f'{__appname__} User Manual' needs_localization = language not in {'en', 'eng'} if needs_localization: import gettext try: t = gettext.translation('simple_index', locale_dirs[0], [language]) - except IOError: + except OSError: pass else: title = t.gettext(title) @@ -176,7 +174,7 @@ def sort_languages(x): lc, name = x if lc == language: return '' - return sort_key(type(u'')(name)) + return sort_key(str(name)) website = 'https://calibre-ebook.com' @@ -193,13 +191,13 @@ extlinks = { } del sort_languages, get_language -epub_author = u'Kovid Goyal' -epub_publisher = u'Kovid Goyal' -epub_copyright = u'© {} Kovid Goyal'.format(date.today().year) -epub_description = u'Comprehensive documentation for calibre' -epub_identifier = u'https://manual.calibre-ebook.com' -epub_scheme = u'url' -epub_uid = u'S54a88f8e9d42455e9c6db000e989225f' +epub_author = 'Kovid Goyal' +epub_publisher = 'Kovid Goyal' +epub_copyright = f'© {date.today().year} Kovid Goyal' +epub_description = 'Comprehensive documentation for calibre' +epub_identifier = 'https://manual.calibre-ebook.com' +epub_scheme = 'url' +epub_uid = 'S54a88f8e9d42455e9c6db000e989225f' epub_tocdepth = 4 epub_tocdup = True epub_cover = ('epub_cover.jpg', 'epub_cover_template.html') @@ -255,5 +253,5 @@ latex_show_pagerefs = True latex_show_urls = 'footnote' latex_elements = { 'papersize':'letterpaper', - 'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), + 'preamble': r'\renewcommand{\pageautorefname}{%s}' % _('page'), # noqa: UP031 } diff --git a/manual/custom.py b/manual/custom.py index ee8f429cdd..1da37118c9 100644 --- a/manual/custom.py +++ b/manual/custom.py @@ -33,7 +33,7 @@ def formatter_funcs(): ans = {'doc': {}, 'sum': {}} with TemporaryDirectory() as tdir: - db = LibraryDatabase(tdir) # needed to load formatter_funcs + db = LibraryDatabase(tdir) # needed to load formatter_funcs ffml = FFMLProcessor() all_funcs = formatter_functions().get_builtins() for func_name, func in all_funcs.items(): @@ -195,13 +195,13 @@ details and examples. lines = [] for cmd in COMMANDS: parser = option_parser_for(cmd)() - lines += ['.. _calibredb-%s-%s:' % (language, cmd), ''] + lines += [f'.. _calibredb-{language}-{cmd}:', ''] lines += [cmd, '~'*20, ''] usage = parser.usage.strip() - usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()] + usage = usage.replace('%prog', 'calibredb').splitlines() cmdline = ' '+usage[0] usage = usage[1:] - usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage] + usage = [re.sub(rf'({cmd})([^a-zA-Z0-9])', r':command:`\1`\2', i) for i in usage] lines += ['.. code-block:: none', '', cmdline, ''] lines += usage groups = [(None, None, parser.option_list)] @@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app): parser, plumber = create_option_parser(['ebook-convert', 'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log) groups = [(pl.name+ ' Options', '', g.option_list) for g in - parser.option_groups if g.title == "INPUT OPTIONS"] + parser.option_groups if g.title == 'INPUT OPTIONS'] prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) for pl in sorted(output_format_plugins(), key=lambda x: x.name): parser, plumber = create_option_parser(['ebook-convert', 'd.epub', 'dummyi.'+pl.file_type, '-h'], default_log) groups = [(pl.name+ ' Options', '', g.option_list) for g in - parser.option_groups if g.title == "OUTPUT OPTIONS"] + parser.option_groups if g.title == 'OUTPUT OPTIONS'] prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) @@ -257,7 +257,7 @@ def generate_ebook_convert_help(preamble, app): def update_cli_doc(name, raw, language): if isinstance(raw, bytes): raw = raw.decode('utf-8') - path = 'generated/%s/%s.rst' % (language, name) + path = f'generated/{language}/{name}.rst' old_raw = open(path, encoding='utf-8').read() if os.path.exists(path) else '' if not os.path.exists(path) or old_raw != raw: import difflib @@ -352,7 +352,7 @@ def cli_docs(language): usage = [mark_options(i) for i in parser.usage.replace('%prog', cmd).splitlines()] cmdline = usage[0] usage = usage[1:] - usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage] + usage = [i.replace(cmd, f':command:`{cmd}`') for i in usage] usage = '\n'.join(usage) preamble = CLI_PREAMBLE.format(cmd=cmd, cmdref=cmd + '-' + language, cmdline=cmdline, usage=usage) if cmd == 'ebook-convert': @@ -382,7 +382,7 @@ def template_docs(language): def localized_path(app, langcode, pagename): href = app.builder.get_target_uri(pagename) - href = re.sub(r'generated/[a-z]+/', 'generated/%s/' % langcode, href) + href = re.sub(r'generated/[a-z]+/', f'generated/{langcode}/', href) prefix = '/' if langcode != 'en': prefix += langcode + '/' @@ -397,7 +397,7 @@ def add_html_context(app, pagename, templatename, context, *args): def guilabel_role(typ, rawtext, text, *args, **kwargs): from sphinx.roles import GUILabel - text = text.replace(u'->', u'\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}') + text = text.replace('->', '\N{THIN SPACE}\N{RIGHTWARDS ARROW}\N{THIN SPACE}') return GUILabel()(typ, rawtext, text, *args, **kwargs) @@ -405,7 +405,7 @@ def setup_man_pages(app): documented_cmds = get_cli_docs()[0] man_pages = [] for cmd, option_parser in documented_cmds: - path = 'generated/%s/%s' % (app.config.language, cmd) + path = f'generated/{app.config.language}/{cmd}' man_pages.append(( path, cmd, cmd, 'Kovid Goyal', 1 )) diff --git a/manual/epub.py b/manual/epub.py index 9fa088bc78..3e08ac5dc8 100644 --- a/manual/epub.py +++ b/manual/epub.py @@ -49,8 +49,8 @@ class EPUBHelpBuilder(EpubBuilder): imgname = container.href_to_name(img.get('src'), name) fmt, width, height = identify(container.raw_data(imgname)) if width == -1: - raise ValueError('Failed to read size of: %s' % imgname) - img.set('style', 'width: %dpx; height: %dpx' % (width, height)) + raise ValueError(f'Failed to read size of: {imgname}') + img.set('style', f'width: {width}px; height: {height}px') def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} @@ -75,7 +75,7 @@ class EPUBHelpBuilder(EpubBuilder): # Ensure that the cover-image property is set cover_id = rmap['_static/' + self.config.epub_cover[0]] - for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)): + for item in container.opf_xpath(f'//opf:item[@id="{cover_id}"]'): item.set('properties', 'cover-image') for item in container.opf_xpath('//opf:item[@href="epub-cover.xhtml"]'): item.set('properties', 'svg calibre:title-page') diff --git a/manual/plugin_examples/editor_demo/main.py b/manual/plugin_examples/editor_demo/main.py index fecdc4dd91..3da2fb442d 100644 --- a/manual/plugin_examples/editor_demo/main.py +++ b/manual/plugin_examples/editor_demo/main.py @@ -32,7 +32,7 @@ class DemoTool(Tool): def create_action(self, for_toolbar=True): # Create an action, this will be added to the plugins toolbar and # the plugins menu - ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa + ac = QAction(get_icons('images/icon.png'), 'Magnify fonts', self.gui) # noqa: F821 if not for_toolbar: # Register a keyboard shortcut for this toolbar action. We only # register it for the action created for the menu, not the toolbar, diff --git a/manual/plugin_examples/helloworld/__init__.py b/manual/plugin_examples/helloworld/__init__.py index 60b3cb570b..07647b2d5f 100644 --- a/manual/plugin_examples/helloworld/__init__.py +++ b/manual/plugin_examples/helloworld/__init__.py @@ -13,13 +13,13 @@ from calibre.customize import FileTypePlugin class HelloWorld(FileTypePlugin): - name = 'Hello World Plugin' # Name of the plugin + name = 'Hello World Plugin' # Name of the plugin description = 'Set the publisher to Hello World for all new conversions' - supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on - author = 'Acme Inc.' # The author of this plugin + supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on + author = 'Acme Inc.' # The author of this plugin version = (1, 0, 0) # The version number of this plugin - file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to - on_postprocess = True # Run this plugin after conversion is complete + file_types = {'epub', 'mobi'} # The file types that this plugin will be applied to + on_postprocess = True # Run this plugin after conversion is complete minimum_calibre_version = (0, 7, 53) def run(self, path_to_ebook): diff --git a/manual/plugin_examples/interface_demo/__init__.py b/manual/plugin_examples/interface_demo/__init__.py index f6733549f2..1a9df67b43 100644 --- a/manual/plugin_examples/interface_demo/__init__.py +++ b/manual/plugin_examples/interface_demo/__init__.py @@ -76,5 +76,3 @@ class InterfacePluginDemo(InterfaceActionBase): ac = self.actual_plugin_ if ac is not None: ac.apply_settings() - - diff --git a/manual/plugin_examples/interface_demo/main.py b/manual/plugin_examples/interface_demo/main.py index c3c35f714f..2e6fd25df7 100644 --- a/manual/plugin_examples/interface_demo/main.py +++ b/manual/plugin_examples/interface_demo/main.py @@ -55,7 +55,7 @@ class DemoDialog(QDialog): self.l.addWidget(self.view_button) self.update_metadata_button = QPushButton( - 'Update metadata in a book\'s files', self) + "Update metadata in a book's files", self) self.update_metadata_button.clicked.connect(self.update_metadata) self.l.addWidget(self.update_metadata_button) diff --git a/manual/sidebar_toc.py b/manual/sidebar_toc.py index b399c130b3..b1488f2874 100644 --- a/manual/sidebar_toc.py +++ b/manual/sidebar_toc.py @@ -54,8 +54,8 @@ class checkbox(nodes.Element): def visit_checkbox(self, node): cid = node['ids'][0] node['classes'] = [] - self.body.append('' - ''.format(cid)) + self.body.append(f'' + f'') def modify_li(li): @@ -66,7 +66,7 @@ def modify_li(li): li['classes'].append('leaf-node') else: c = checkbox() - c['ids'] = ['collapse-checkbox-{}'.format(next(id_counter))] + c['ids'] = [f'collapse-checkbox-{next(id_counter)}'] li.insert(0, c) diff --git a/manual/template_ref_generate.py b/manual/template_ref_generate.py index ba64235d75..0aac5a924e 100644 --- a/manual/template_ref_generate.py +++ b/manual/template_ref_generate.py @@ -61,7 +61,7 @@ def generate_template_language_help(language, log): a = output.append with TemporaryDirectory() as tdir: - db = LibraryDatabase(tdir) # needed to load formatter_funcs + db = LibraryDatabase(tdir) # needed to load formatter_funcs ffml = FFMLProcessor() all_funcs = formatter_functions().get_builtins() categories = defaultdict(dict) @@ -89,5 +89,6 @@ def generate_template_language_help(language, log): a(POSTAMBLE) return ''.join(output) + if __name__ == '__main__': generate_template_language_help() diff --git a/pyproject.toml b/pyproject.toml index e16c31d869..c6bc3082fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,11 @@ quote-style = 'single' [tool.ruff.lint] ignore = ['E402', 'E722', 'E741'] -select = ['E', 'F', 'I', 'W', 'INT'] +select = ['E', 'F', 'I', 'W', 'INT', 'PIE794'] +unfixable = ['PIE794'] [tool.ruff.lint.per-file-ignores] -"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501', 'W191'] +"src/calibre/ebooks/unihandecode/*codepoints.py" = ['E501'] "src/qt/*.py" = ['I'] "src/qt/*.pyi" = ['I'] diff --git a/recipes/1843.recipe b/recipes/1843.recipe index df4b16f452..d7c79c7122 100644 --- a/recipes/1843.recipe +++ b/recipes/1843.recipe @@ -17,6 +17,7 @@ from calibre.web.feeds.news import BasicNewsRecipe use_archive = True + def E(parent, name, text='', **attrs): ans = parent.makeelement(name, **attrs) ans.text = text @@ -60,8 +61,8 @@ if use_archive: data = json.loads(raw) body = root.xpath('//body')[0] article = E(body, 'article') - E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') - E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') + E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') try: date = data['dateModified'] @@ -95,7 +96,7 @@ else: for child in tuple(body): body.remove(child) article = E(body, 'article') - E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;') + E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;') E(article, 'h1', replace_entities(data['headline'])) E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;') if data['dateline'] is None: @@ -157,7 +158,7 @@ class Economist(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = "Kovid Goyal" + __author__ = 'Kovid Goyal' description = ( 'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and ' 'an unworthy, timid ignorance obstructing our progress.”' @@ -170,7 +171,7 @@ class Economist(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={ 'class': [ 'dblClkTrk', 'ec-article-info', 'share_inline_header', @@ -224,13 +225,13 @@ class Economist(BasicNewsRecipe): def parse_index(self): # return self.economist_test_article() soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub') - script_tag = soup.find("script", id="__NEXT_DATA__") + script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag is None: raise ValueError('No script tag with JSON data found in the weeklyedition archive') data = json.loads(script_tag.string) content_id = data['props']['pageProps']['content'][0]['tegID'].split('/')[-1] query = { - 'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa + 'query': 'query HubsDataQuery($id:String!$size:Int!){canonical(ref:$id){id headline description url{canonical __typename}image{ident{url{canonical __typename}width height __typename}__typename}text(mode:"hub" format:"json")hasPart(size:$size){parts{id title:headline isPartOf{context{title:headline __typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501 'operationName': 'HubsDataQuery', 'variables': '{{"id":"/content/{}","size":40}}'.format(content_id), } @@ -247,22 +248,22 @@ class Economist(BasicNewsRecipe): self.description = data['description'] feeds_dict = defaultdict(list) - for part in safe_dict(data, "hasPart", "parts"): + for part in safe_dict(data, 'hasPart', 'parts'): section = part['title'] self.log(section) - for art in safe_dict(part, "hasPart", "parts"): - title = safe_dict(art, "title") - desc = safe_dict(art, "rubric") or '' - sub = safe_dict(art, "flyTitle") or '' + for art in safe_dict(part, 'hasPart', 'parts'): + title = safe_dict(art, 'title') + desc = safe_dict(art, 'rubric') or '' + sub = safe_dict(art, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc pt = PersistentTemporaryFile('.html') pt.write(json.dumps(art).encode('utf-8')) pt.close() url = 'file:///' + pt.name - feeds_dict[section].append({"title": title, "url": url, "description": desc}) + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) self.log('\t', title, '\n\t\t', desc) - return [(section, articles) for section, articles in feeds_dict.items()] + return list(feeds_dict.items()) def populate_article_metadata(self, article, soup, first): article.url = soup.find('h1')['title'] @@ -273,7 +274,7 @@ class Economist(BasicNewsRecipe): 'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/') return soup - else: # Load articles from individual article pages {{{ + else: # Load articles from individual article pages {{{ def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) @@ -311,26 +312,26 @@ class Economist(BasicNewsRecipe): return ans def economist_parse_index(self, soup): - script_tag = soup.find("script", id="__NEXT_DATA__") + script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag is not None: data = json.loads(script_tag.string) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) - self.title = safe_dict(data, "props", "pageProps", "content", "headline") + self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline') # self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600' feeds = [] - for coll in safe_dict(data, "props", "pageProps", "content", "collections"): - section = safe_dict(coll, "headline") or '' + for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'): + section = safe_dict(coll, 'headline') or '' self.log(section) articles = [] - for part in safe_dict(coll, "hasPart", "parts"): - title = safe_dict(part, "headline") or '' - url = safe_dict(part, "url", "canonical") or '' + for part in safe_dict(coll, 'hasPart', 'parts'): + title = safe_dict(part, 'headline') or '' + url = safe_dict(part, 'url', 'canonical') or '' if not title or not url: continue - desc = safe_dict(part, "description") or '' - sub = safe_dict(part, "subheadline") or '' + desc = safe_dict(part, 'description') or '' + sub = safe_dict(part, 'subheadline') or '' if sub: desc = sub + ' :: ' + desc self.log('\t', title, '\n\t', desc, '\n\t\t', url) @@ -341,7 +342,6 @@ class Economist(BasicNewsRecipe): # }}} - def preprocess_raw_html(self, raw, url): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) if use_archive: @@ -358,9 +358,9 @@ class Economist(BasicNewsRecipe): cleanup_html_article(root) if '/interactive/' in url: - return '

' + root.xpath('//h1')[0].text + '

' \ - + 'This article is supposed to be read in a browser' \ - + '
' + return ('

' + root.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') for div in root.xpath('//div[@class="lazy-image"]'): noscript = list(div.iter('noscript')) diff --git a/recipes/20_minutos.recipe b/recipes/20_minutos.recipe index cb75c2e71a..31c8ac0a76 100644 --- a/recipes/20_minutos.recipe +++ b/recipes/20_minutos.recipe @@ -36,22 +36,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe): remove_empty_feeds = True keep_only_tags = [ - dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa + dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa: E501 ] remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']}) - remove_tags_after = dict( - name='div', attrs={'class': ['related-news', 'col']}) + remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']}) remove_tags = [ - dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa + dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501 ] - extra_css = """ - p{text-align: justify; font-size: 100%} - body{ text-align: left; font-size:100% } - h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } - """ + extra_css = ''' + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } + ''' preprocess_regexps = [(re.compile( r'', re.DOTALL), lambda m: '')] diff --git a/recipes/20minutes.recipe b/recipes/20minutes.recipe index 0890b6f2c3..4baa4e1cd6 100644 --- a/recipes/20minutes.recipe +++ b/recipes/20minutes.recipe @@ -9,8 +9,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) class Minutes(BasicNewsRecipe): diff --git a/recipes/DrawAndCook.recipe b/recipes/DrawAndCook.recipe index 95a74b8cfa..7916fe512b 100644 --- a/recipes/DrawAndCook.recipe +++ b/recipes/DrawAndCook.recipe @@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe): def parse_index(self): feeds = [] for title, url in [ - ("They Draw and Cook", "http://www.theydrawandcook.com/") + ('They Draw and Cook', 'http://www.theydrawandcook.com/') ]: articles = self.make_links(url) if articles: diff --git a/recipes/TheMITPressReader.recipe b/recipes/TheMITPressReader.recipe index 84417ce803..c64226e771 100644 --- a/recipes/TheMITPressReader.recipe +++ b/recipes/TheMITPressReader.recipe @@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheMITPressReader(BasicNewsRecipe): - title = "The MIT Press Reader" + title = 'The MIT Press Reader' __author__ = 'yodha8' language = 'en' - description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors." - " This recipe pulls articles from the past 7 days.") + description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.' + ' This recipe pulls articles from the past 7 days.') oldest_article = 7 max_articles_per_feed = 100 auto_cleanup = True diff --git a/recipes/abc_au.recipe b/recipes/abc_au.recipe index 82591d1357..82662f0351 100644 --- a/recipes/abc_au.recipe +++ b/recipes/abc_au.recipe @@ -38,9 +38,9 @@ class ABCNews(BasicNewsRecipe): if d and isinstance(d, str): self.oldest_article = float(d) -# auto_cleanup = True # enable this as a backup option if recipe stops working + # auto_cleanup = True # enable this as a backup option if recipe stops working -# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data) + # use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data) no_stylesheets = True remove_javascript = True @@ -57,7 +57,7 @@ class ABCNews(BasicNewsRecipe): # Clear out all the unwanted html tags: # ************************************ remove_tags = [ -# dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)}) + # dict(name='aside', attrs={'name': re.compile(aside_reg_exp, re.IGNORECASE)}) { 'name': ['meta', 'link', 'noscript', 'aside'] }, @@ -98,12 +98,12 @@ class ABCNews(BasicNewsRecipe): ('Health', 'https://www.abc.net.au/news/feed/9167762/rss.xml'), ('Arts and Entertainment', 'https://www.abc.net.au/news/feed/472/rss.xml'), ('Fact Check', 'https://www.abc.net.au/news/feed/5306468/rss.xml'), -# ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line -# ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line -# ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line -# ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line -# ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line -# ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line -# ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line -# ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line + # ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'), #enable by removing # at start of line + # ('Brisbane', 'https://www.abc.net.au/news/feed/8053540/rss.xml'), #enable by removing # at start of line + # ('Canberra', 'https://www.abc.net.au/news/feed/8057234/rss.xml'), #enable by removing # at start of line + # ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'), #enable by removing # at start of line + # ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'), #enable by removing # at start of line + # ('Melbourne', 'https://www.abc.net.au/news/feed/8057136/rss.xml'), #enable by removing # at start of line + # ('Perth', 'https://www.abc.net.au/news/feed/8057096/rss.xml'), #enable by removing # at start of line + # ('Sydney', 'https://www.abc.net.au/news/feed/8055316/rss.xml'), #enable by removing # at start of line ] diff --git a/recipes/abc_es.recipe b/recipes/abc_es.recipe index f5b036b359..0f902a0202 100644 --- a/recipes/abc_es.recipe +++ b/recipes/abc_es.recipe @@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe): if d and isinstance(d, str): self.oldest_article = float(d) - extra_css = """ + extra_css = ''' p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; } h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } - """ + ''' feeds = [ diff --git a/recipes/acrimed.recipe b/recipes/acrimed.recipe index da796681e4..d3229562bd 100644 --- a/recipes/acrimed.recipe +++ b/recipes/acrimed.recipe @@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe): lambda m: '' + m.group(1) + ''), (re.compile(r'

(.*) - Acrimed \| Action Critique M.*dias

'), lambda m: '

' + m.group(1) + '

')] - extra_css = """ + extra_css = ''' .chapo{font-style:italic; margin: 1em 0 0.5em} - """ + ''' diff --git a/recipes/ad.recipe b/recipes/ad.recipe index a8f69b107d..1164c480a5 100644 --- a/recipes/ad.recipe +++ b/recipes/ad.recipe @@ -85,9 +85,10 @@ class ADRecipe(BasicNewsRecipe): def print_version(self, url): parts = url.split('/') - print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \ - + parts[10] + '/' + parts[7] + '/print/' + \ - parts[8] + '/' + parts[9] + '/' + parts[13] + print_url = 'http://' + '/'.join([ + parts[2], parts[3], parts[4], parts[5], parts[10], + parts[7], 'print', parts[8], parts[9], parts[13], + ]) return print_url diff --git a/recipes/adevarul.recipe b/recipes/adevarul.recipe index e013497b88..bea42e0ca6 100644 --- a/recipes/adevarul.recipe +++ b/recipes/adevarul.recipe @@ -33,7 +33,7 @@ class Adevarul(BasicNewsRecipe): ] remove_tags = [ - dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa + dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa: E501 ] remove_tags_after = [ diff --git a/recipes/adventuregamers.recipe b/recipes/adventuregamers.recipe index e1c5ddfd20..7a019c1512 100644 --- a/recipes/adventuregamers.recipe +++ b/recipes/adventuregamers.recipe @@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe): remove_javascript = True use_embedded_content = False INDEX = u'http://www.adventuregamers.com' - extra_css = """ + extra_css = ''' .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} .pageheader_title,.page_title{font-size: xx-large; color: #394128} .pageheader_byline{font-size: small; font-weight: bold; color: #394128} @@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe): .score_header{font-size: large; color: #50544A} img{margin-bottom: 1em;} body{font-family: 'Open Sans',Helvetica,Arial,sans-serif} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/afr.recipe b/recipes/afr.recipe index d43cb046cb..fc89323751 100644 --- a/recipes/afr.recipe +++ b/recipes/afr.recipe @@ -8,13 +8,14 @@ def absurl(url): if url.startswith('/'): return 'https://www.afr.com' + url + class afr(BasicNewsRecipe): title = 'Australian Financial Review' __author__ = 'unkn0wn' description = ( 'For more than 65 years The Australian Financial Review has been the authority on business,' ' finance and investment news in Australia. It has a reputation for independent, award-winning ' - 'journalism and is essential reading for Australia\'s business and investor community.' + "journalism and is essential reading for Australia's business and investor community." ) masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png' encoding = 'utf-8' @@ -24,7 +25,6 @@ class afr(BasicNewsRecipe): timefmt = ' [%d %b %Y]' max_articles_per_feed = 25 no_stylesheets = True - remove_empty_feeds = True remove_attributes = ['style', 'height', 'width'] keep_only_tags = [ @@ -39,7 +39,7 @@ class afr(BasicNewsRecipe): dict(name=['button', 'aside', 'svg']), ] - remove_tags_after= [ dict(name='aside', attrs={'id':'stickyContainer'})] + remove_tags_after= [dict(name='aside', attrs={'id':'stickyContainer'})] extra_css = ''' #img-cap {font-size:small; text-align:center;} diff --git a/recipes/afrique_21.recipe b/recipes/afrique_21.recipe index df24e4d17c..2c1e5ccbcc 100644 --- a/recipes/afrique_21.recipe +++ b/recipes/afrique_21.recipe @@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe): ''' def default_cover(self, cover_file): - """ + ''' Crée une couverture personnalisée avec le logo - """ + ''' from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data @@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe): weekday = french_weekday[wkd] month = french_month[today.month] - date_str = f"{weekday} {today.day} {month} {today.year}" + date_str = f'{weekday} {today.day} {month} {today.year}' edition = today.strftime('Édition de %Hh') # Image de base diff --git a/recipes/agents.recipe b/recipes/agents.recipe index 43414eff82..096f8dbd39 100644 --- a/recipes/agents.recipe +++ b/recipes/agents.recipe @@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Agents(BasicNewsRecipe): title = u'\u00AB\u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E\u00BB' - description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa + description = u'\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u0430\u043D\u043D\u043E\u0435 \u0432\u044B\u0445\u043E\u0434\u0446\u0430\u043C\u0438 \u0438\u0437 \u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432\u0430 \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u043C\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u041F\u0440\u043E\u0435\u043A\u0442\u00BB.' # noqa: E501 __author__ = 'bugmen00t' publisher = 'Project Media' publication_type = 'news' diff --git a/recipes/aktualne.cz.recipe b/recipes/aktualne.cz.recipe index ed1d647b8b..d1a8701769 100644 --- a/recipes/aktualne.cz.recipe +++ b/recipes/aktualne.cz.recipe @@ -32,7 +32,7 @@ class aktualneRecipe(BasicNewsRecipe): remove_attributes = [] remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']}) filter_regexps = [r'img.aktualne.centrum.cz'] - remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}), + remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}), dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}), dict(name='div', attrs={'class': 'itemcomment id0'}), dict(name='div', attrs={'class': 'hlavicka'}), diff --git a/recipes/al_jazeera.recipe b/recipes/al_jazeera.recipe index ed7957dccf..0e94bdc5dd 100644 --- a/recipes/al_jazeera.recipe +++ b/recipes/al_jazeera.recipe @@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - extra_css = """ + extra_css = ''' body{font-family: Arial,sans-serif} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -55,7 +55,7 @@ class AlJazeera(BasicNewsRecipe): u'http://www.aljazeera.com/xml/rss/all.xml')] def get_article_url(self, article): - artlurl = article.get('link', None) + artlurl = article.get('link', None) return artlurl def preprocess_html(self, soup): diff --git a/recipes/al_masry_alyoum_arabic.recipe b/recipes/al_masry_alyoum_arabic.recipe index b834f1ff62..63920445ed 100644 --- a/recipes/al_masry_alyoum_arabic.recipe +++ b/recipes/al_masry_alyoum_arabic.recipe @@ -22,7 +22,7 @@ class AlMasryAlyoum(BasicNewsRecipe): category = 'News' publication_type = 'newsportal' - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa: E501 keep_only_tags = [ dict(name='div', attrs={'class': ['article']}) diff --git a/recipes/al_monitor.recipe b/recipes/al_monitor.recipe index 9c3f214b6b..d57e602275 100644 --- a/recipes/al_monitor.recipe +++ b/recipes/al_monitor.recipe @@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe): title = title[0:120] + '...' href = link.get('href') if not href: - self._p("BAD HREF: " + str(link)) + self._p('BAD HREF: ' + str(link)) return self.queue_article_link(section, href, title) @@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe): age = (datetime.datetime.now() - date).days if (age > self.oldest_article): - return "too old" + return 'too old' return False def scrape_article_date(self, soup): @@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe): def date_from_string(self, datestring): try: # eg: Posted September 17, 2014 - dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y") + dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y') except: dt = None @@ -203,11 +203,10 @@ class AlMonitor(BasicNewsRecipe): return self.tag_to_string(n).strip() def _dbg_soup_node(self, node): - s = ' cls: ' + str(node.get('class')).strip() + \ - ' id: ' + str(node.get('id')).strip() + \ - ' role: ' + str(node.get('role')).strip() + \ - ' txt: ' + self.text(node) - return s + return (' cls: ' + str(node.get('class')).strip() + + ' id: ' + str(node.get('id')).strip() + + ' role: ' + str(node.get('role')).strip() + + ' txt: ' + self.text(node)) def _p(self, msg): curframe = inspect.currentframe() diff --git a/recipes/albert_mohler.recipe b/recipes/albert_mohler.recipe index a85063290a..6babd67c0f 100644 --- a/recipes/albert_mohler.recipe +++ b/recipes/albert_mohler.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AlbertMohlersBlog(BasicNewsRecipe): - title = u'Albert Mohler\'s Blog' + title = u"Albert Mohler's Blog" __author__ = 'Peter Grungi' language = 'en' oldest_article = 90 @@ -13,8 +13,7 @@ class AlbertMohlersBlog(BasicNewsRecipe): auto_cleanup = True cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif' publisher = 'Albert Mohler' - language = 'en' author = 'Albert Mohler' - feeds = [(u'Albert Mohler\'s Blog', + feeds = [(u"Albert Mohler's Blog", u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')] diff --git a/recipes/ald.recipe b/recipes/ald.recipe index e34e6b90ed..88d30aeac0 100644 --- a/recipes/ald.recipe +++ b/recipes/ald.recipe @@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe): # Extract a list of dates from the page. # Subset this out to the list of target dates for extraction. date_list = [] - for div in soup.findAll('div', attrs={'id': "dayheader"}): + for div in soup.findAll('div', attrs={'id': 'dayheader'}): date_list.append(self.tag_to_string(div)) date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list] date_list_bool = [ @@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe): # Process each paragraph one by one. # Stop when the text of the previous div is not in the target date list. - for div in soup.findAll('div', attrs={'class': "mobile-front"}): + for div in soup.findAll('div', attrs={'class': 'mobile-front'}): for p in div.findAll('p'): if self.tag_to_string(p.findPreviousSibling('div')) in compress_date: if p.find('a'): title = self.tag_to_string(p) link = p.find('a')['href'] if self.tag_to_string(p.findPreviousSibling('h3') - ) == "Articles of Note": + ) == 'Articles of Note': articles_note.append({ 'title': title, 'url': link, @@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe): 'date': '' }) elif self.tag_to_string(p.findPreviousSibling('h3') - ) == "New Books": + ) == 'New Books': new_books.append({ 'title': title, 'url': link, diff --git a/recipes/alejakomiksu_com.recipe b/recipes/alejakomiksu_com.recipe index 2f22bbffaf..c08a5ada5f 100644 --- a/recipes/alejakomiksu_com.recipe +++ b/recipes/alejakomiksu_com.recipe @@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe): def skip_ad_pages(self, soup): tag = soup.find(attrs={'class': 'rodzaj'}) if tag and tag.a.string.lower().strip() == 'recenzje': - link = soup.find(text=re.compile('recenzuje')) + link = soup.find(text=re.compile(r'recenzuje')) if link: return self.index_to_soup(link.parent['href'], raw=True) diff --git a/recipes/alternatives_economiques.recipe b/recipes/alternatives_economiques.recipe index 4845283694..5d5cda5911 100644 --- a/recipes/alternatives_economiques.recipe +++ b/recipes/alternatives_economiques.recipe @@ -21,7 +21,7 @@ class AlternativesEconomiques(BasicNewsRecipe): remove_images = False def get_cover_url(self): - """Récupère dynamiquement l'URL de la dernière une depuis MLP""" + '''Récupère dynamiquement l'URL de la dernière une depuis MLP''' br = self.get_browser() try: # Accéder à la page du magazine sur MLP @@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe): self.log('Cover URL found:', cover_url) return cover_url - self.log('Aucune couverture trouvée, utilisation de l\'image par défaut') + self.log("Aucune couverture trouvée, utilisation de l'image par défaut") return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg' except Exception as e: @@ -92,7 +92,7 @@ class AlternativesEconomiques(BasicNewsRecipe): display_name = section_name.replace('-', ' ').title() articles.append((display_name, feed_articles[:self.max_articles_per_feed])) except Exception as e: - self.log.error(f'Error processing {section_name}: {str(e)}') + self.log.error(f'Error processing {section_name}: {e}') continue return articles @@ -133,7 +133,7 @@ class AlternativesEconomiques(BasicNewsRecipe): 'description': '' }) except Exception as e: - self.log.error(f'Error getting H1 title for {article_url}: {str(e)}') + self.log.error(f'Error getting H1 title for {article_url}: {e}') continue return feed_articles diff --git a/recipes/altomdata_dk.recipe b/recipes/altomdata_dk.recipe index c0e76b1fdd..3dbdd41a0d 100644 --- a/recipes/altomdata_dk.recipe +++ b/recipes/altomdata_dk.recipe @@ -21,11 +21,9 @@ class WwwAltomdata_dk(BasicNewsRecipe): resolve_internal_links = True remove_empty_feeds = True auto_cleanup = True - language = 'da' feeds = [ ('Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/feed'), ('Kommentarer til Alt om DATA, Datatid TechLife', 'http://www.altomdata.dk/comments/feed'), ] - diff --git a/recipes/am730.recipe b/recipes/am730.recipe index 38d4a5ea9b..d5114f119e 100644 --- a/recipes/am730.recipe +++ b/recipes/am730.recipe @@ -34,7 +34,7 @@ class AM730(BasicNewsRecipe): description = 'http://www.am730.com.hk' category = 'Chinese, News, Hong Kong' masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png' - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa: E501 remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}), dict(name='div',attrs={'class':'logo-container print-logo'}), dict(name='div',attrs={'id':'galleria'})] @@ -53,12 +53,12 @@ class AM730(BasicNewsRecipe): return self.masthead_url def getAMSectionArticles(self, sectionName,url): - # print sectionName + # print(sectionName) soup = self.index_to_soup(url) articles = [] for aTag in soup.findAll('a',attrs={'class':'newsimglink'}): href = aTag.get('href',False) - if not href.encode("utf-8").startswith(url.encode("utf-8")) : + if not href.encode('utf-8').startswith(url.encode('utf-8')): continue # not in same section title = href.split('/')[-1].split('-')[0] @@ -67,7 +67,7 @@ class AM730(BasicNewsRecipe): print(title) try: if articles.index({'title':title,'url':href})>=0: - # print 'already added' + # print('already added') continue # already added except: pass @@ -78,7 +78,7 @@ class AM730(BasicNewsRecipe): break if self.debug: print(articles) - return (sectionName,articles) + return sectionName, articles def parse_index(self): # hard code sections @@ -89,12 +89,12 @@ class AM730(BasicNewsRecipe): ('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'), ('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'), ('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F') - ] # articles =[] + ] # articles =[] SectionsArticles=[] - for (title, url) in Sections: + for title, url in Sections: if self.debug: print(title) print(url) SectionsArticles.append(self.getAMSectionArticles(title,url)) -# feeds.append(articles[0]['url']) + # feeds.append(articles[0]['url']) return SectionsArticles diff --git a/recipes/ambito.recipe b/recipes/ambito.recipe index 7a5a177cc6..ad83cc6605 100644 --- a/recipes/ambito.recipe +++ b/recipes/ambito.recipe @@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe): language = 'es_AR' publication_type = 'newsportal' masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg' - extra_css = """ + extra_css = ''' body{font-family: Roboto, sans-serif} - """ + ''' conversion_options = { 'comment': description, diff --git a/recipes/american_thinker.recipe b/recipes/american_thinker.recipe index 9ad7d0f7ec..b054407f86 100644 --- a/recipes/american_thinker.recipe +++ b/recipes/american_thinker.recipe @@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AmericanThinker(BasicNewsRecipe): title = u'American Thinker' - description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans." + description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.' __author__ = 'Walt Anthony' publisher = 'Thomas Lifson' category = 'news, politics, USA' @@ -33,7 +33,7 @@ class AmericanThinker(BasicNewsRecipe): root = html5lib.parse( clean_xml_chars(raw), treebuilder='lxml', namespaceHTMLElements=False) - for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa + for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa: E501 x.getparent().remove(x) return etree.tostring(root, encoding='unicode') diff --git a/recipes/anandtech.recipe b/recipes/anandtech.recipe index aa29ed443c..8ca0011757 100644 --- a/recipes/anandtech.recipe +++ b/recipes/anandtech.recipe @@ -39,4 +39,4 @@ class anan(BasicNewsRecipe): def print_version(self, url): # return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update - return url.replace("/show/", "/print/") # 2014-02-27 AGE: update + return url.replace('/show/', '/print/') # 2014-02-27 AGE: update diff --git a/recipes/ancient_egypt.recipe b/recipes/ancient_egypt.recipe index c40b0aa3cc..d88a181ea3 100644 --- a/recipes/ancient_egypt.recipe +++ b/recipes/ancient_egypt.recipe @@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe): language = 'en' __author__ = 'unkn0wn' description = ( - 'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. ' + "Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. " 'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering ' 'more than 5000 years of Egyptian history. Published bimonthly.' ) diff --git a/recipes/andhrajyothy_ap.recipe b/recipes/andhrajyothy_ap.recipe index 041098c8ca..86a122fb6c 100644 --- a/recipes/andhrajyothy_ap.recipe +++ b/recipes/andhrajyothy_ap.recipe @@ -5,7 +5,7 @@ from datetime import date from calibre.web.feeds.news import BasicNewsRecipe # figure out your local edition id from the log of this recipe -edi_id = 182 # NTR VIJAYAWADA - 182 +edi_id = 182 # NTR VIJAYAWADA - 182 today = date.today().strftime('%d/%m/%Y') @@ -18,6 +18,7 @@ today = today.replace('/', '%2F') index = 'https://epaper.andhrajyothy.com' + class andhra(BasicNewsRecipe): title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్' language = 'te' @@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe): self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') for edi in edi_data: if edi['org_location'] in {'Magazines', 'Navya Daily'}: - continue + continue self.log(edi['org_location']) cities = [] for edi_loc in edi['editionlocation']: @@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe): url = str(snaps['OrgId']) if snaps['ObjectType'] == 4: continue - feeds_dict[section].append({"title": '', "url": url}) - return [(section, articles) for section, articles in feeds_dict.items()] + feeds_dict[section].append({'title': '', 'url': url}) + return list(feeds_dict.items()) def preprocess_raw_html(self, raw, *a): data = json.loads(raw) diff --git a/recipes/andhrajyothy_tel.recipe b/recipes/andhrajyothy_tel.recipe index 37cba5ebfb..eb1abb8dfd 100644 --- a/recipes/andhrajyothy_tel.recipe +++ b/recipes/andhrajyothy_tel.recipe @@ -5,7 +5,7 @@ from datetime import date from calibre.web.feeds.news import BasicNewsRecipe # figure out your local edition id from the log of this recipe -edi_id = 34 # HYDERABAD MAIN I - 34 +edi_id = 34 # HYDERABAD MAIN I - 34 today = date.today().strftime('%d/%m/%Y') @@ -18,6 +18,7 @@ today = today.replace('/', '%2F') index = 'https://epaper.andhrajyothy.com' + class andhra(BasicNewsRecipe): title = 'ఆంధ్రజ్యోతి - తెలంగాణ' language = 'te' @@ -50,7 +51,7 @@ class andhra(BasicNewsRecipe): self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') for edi in edi_data: if edi['org_location'] in {'Magazines', 'Navya Daily'}: - continue + continue self.log(edi['org_location']) cities = [] for edi_loc in edi['editionlocation']: @@ -75,8 +76,8 @@ class andhra(BasicNewsRecipe): url = str(snaps['OrgId']) if snaps['ObjectType'] == 4: continue - feeds_dict[section].append({"title": '', "url": url}) - return [(section, articles) for section, articles in feeds_dict.items()] + feeds_dict[section].append({'title': '', 'url': url}) + return list(feeds_dict.items()) def preprocess_raw_html(self, raw, *a): data = json.loads(raw) diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe index c62f1ceef4..6265f25011 100644 --- a/recipes/android_com_pl.recipe +++ b/recipes/android_com_pl.recipe @@ -16,5 +16,5 @@ class Android_com_pl(BasicNewsRecipe): remove_tags_after = [{'class': 'post-content'}] remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})] preprocess_regexps = [ - (re.compile(u'

.{,1}

', re.DOTALL), lambda match: '')] + (re.compile(r'

.{,1}

', re.DOTALL), lambda match: '')] feeds = [(u'Android', u'http://android.com.pl/feed/')] diff --git a/recipes/android_police.recipe b/recipes/android_police.recipe index a3b9a4114f..dd9b304600 100644 --- a/recipes/android_police.recipe +++ b/recipes/android_police.recipe @@ -23,36 +23,36 @@ class AdvancedUserRecipe1718384518(BasicNewsRecipe): auto_cleanup = True feeds = [ - #Phones + # Phones ('Phones', 'https://www.androidpolice.com/feed/phones/'), ('News about Phones', 'https://www.androidpolice.com/feed/phones-news/'), ('Guides about Phones', 'https://www.androidpolice.com/feed/phones-guide/'), ('Phones Features', 'https://www.androidpolice.com/feed/phones-features/'), ('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'), - #Google + # Google ('Google', 'https://www.androidpolice.com/feed/google/'), ('News about Google', 'https://www.androidpolice.com/feed/news-google/'), ('Google Applications', 'https://www.androidpolice.com/feed/tag/google-app/'), ('Guides about Google', 'https://www.androidpolice.com/feed/guides-google/'), ('Features about Google', 'https://www.androidpolice.com/feed/features-google/'), - #Operating Systems + # Operating Systems ('Operating Systems', 'https://www.androidpolice.com/feed/operating-systems/'), ('News about Operating Systems', 'https://www.androidpolice.com/feed/news-operating-systems/'), ('Guides about Operating Systems', 'https://www.androidpolice.com/feed/guides-operating-systems/'), ('Features on Operating Systems', 'https://www.androidpolice.com/feed/features-operating-systems/'), - #Chromebooks + # Chromebooks ('Chromebooks', 'https://www.androidpolice.com/feed/laptops/'), ('News about Chromebooks', 'https://www.androidpolice.com/feed/news-chromebooks/'), ('Guides about Chromebooks', 'https://www.androidpolice.com/feed/guides-chromebooks/'), ('Chromebook & Laptop Reviews', 'https://www.androidpolice.com/feed/reviews-chromebooks/'), - #Gadgets + # Gadgets ('Gadgets', 'https://www.androidpolice.com/feed/gadgets/'), ('Smartwatches & Wearables', 'https://www.androidpolice.com/feed/wearables/'), ('Audio', 'https://www.androidpolice.com/feed/tag/audio/'), ('Accessories', 'https://www.androidpolice.com/feed/accessories/'), ('Smart Home', 'https://www.androidpolice.com/feed/smart-home/'), ('Applications & Games', 'https://www.androidpolice.com/feed/applications-games/'), - #Reviews + # Reviews ('Reviews', 'https://www.androidpolice.com/feed/reviews/'), ('Phones & Accessory Reviews', 'https://www.androidpolice.com/feed/phones-reviews/'), ('Smartwatch & Wearable Reviews', 'https://www.androidpolice.com/feed/wearable-reviews/'), diff --git a/recipes/animal_politico.recipe b/recipes/animal_politico.recipe index 5d23429f2a..c356126d64 100644 --- a/recipes/animal_politico.recipe +++ b/recipes/animal_politico.recipe @@ -32,14 +32,11 @@ class AnimalPolitico(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup('http://www.animalpolitico.com/') articles = [] - for a in soup(**{ - 'name': 'a', - 'attrs': { + for a in soup(name='a', attrs={ 'href': True, 'title': True, 'data-author': True, 'data-type': True, 'data-home-title': True - } - }): + }): title = a['title'] url = a['href'] author = a['data-author'] diff --git a/recipes/aprospect.recipe b/recipes/aprospect.recipe index de904a2dca..5de1cefe7d 100644 --- a/recipes/aprospect.recipe +++ b/recipes/aprospect.recipe @@ -19,8 +19,6 @@ class AmericanProspect(BasicNewsRecipe): use_embedded_content = False - no_stylesheets = True - keep_only_tags = [ dict(id=['title', 'content']), ] diff --git a/recipes/arbetaren.recipe b/recipes/arbetaren.recipe index e22ed5e23f..7e34597917 100644 --- a/recipes/arbetaren.recipe +++ b/recipes/arbetaren.recipe @@ -18,8 +18,6 @@ class Arbetaren_SE(BasicNewsRecipe): encoding = 'utf-8' language = 'sv' auto_cleanup = True - auto_cleanup_keep = '//div[@class="thumbnail"]' - auto_cleanup_keep = '//div[@id="article-image"]' - auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]' + auto_cleanup_keep = '//div[@class="thumbnail"]|//div[@id="article-image"]|//span[@class="important"]' feeds = [(u'Nyheter', u'https://www.arbetaren.se/feed')] diff --git a/recipes/arcamax.recipe b/recipes/arcamax.recipe index c255442669..aef1ca6a26 100644 --- a/recipes/arcamax.recipe +++ b/recipes/arcamax.recipe @@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe): # (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"), # (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"), # (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"), - (u"BC", u"https://www.arcamax.com/thefunnies/bc"), + (u'BC', u'https://www.arcamax.com/thefunnies/bc'), # (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"), # (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"), - (u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"), + (u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'), # u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"), # (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"), # (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"), # (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"), - (u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"), + (u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'), # (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"), # (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"), - (u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"), - (u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"), + (u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'), + (u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'), # (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"), # (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"), # (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"), @@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe): # (u"Luann", u"https://www.arcamax.com/thefunnies/luann"), # (u"Momma", u"https://www.arcamax.com/thefunnies/momma"), # (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"), - (u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"), + (u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'), # (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"), # (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"), # (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"), # (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"), # (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"), # (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"), - (u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"), - (u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"), - (u"Zits", u"https://www.arcamax.com/thefunnies/zits"), + (u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'), + (u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'), + (u'Zits', u'https://www.arcamax.com/thefunnies/zits'), ]: self.log('Finding strips for:', title) articles = self.make_links(url, title) diff --git a/recipes/arret_sur_images.recipe b/recipes/arret_sur_images.recipe index d4b3520430..b4c0ba79c4 100644 --- a/recipes/arret_sur_images.recipe +++ b/recipes/arret_sur_images.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class ArretSurImages(BasicNewsRecipe): title = 'Arrêt sur Images' - description = 'Site français d\'analyse des médias' + description = "Site français d'analyse des médias" language = 'fr' encoding = 'utf-8' needs_subscription = True @@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe): ] def default_cover(self, cover_file): - """ + ''' Crée une couverture personnalisée avec le logo ASI - """ + ''' from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data @@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe): weekday = french_weekday[wkd] month = french_month[today.month] - date_str = f"{weekday} {today.day} {month} {today.year}" + date_str = f'{weekday} {today.day} {month} {today.year}' edition = today.strftime('Édition de %Hh') img = QImage(1400, 1920, QImage.Format_RGB888) @@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe): br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')] print('Authentification réussie') else: - print('Échec de l\'authentification - Vérifiez vos identifiants') + print("Échec de l'authentification - Vérifiez vos identifiants") except Exception as e: - print(f'Erreur lors de l\'authentification: {str(e)}') + print(f"Erreur lors de l'authentification: {e}") return br def get_article_url(self, article): @@ -162,7 +162,7 @@ class ArretSurImages(BasicNewsRecipe): ''' except Exception as e: - print(f'Erreur preprocessing HTML: {str(e)}') + print(f'Erreur preprocessing HTML: {e}') return raw_html def preprocess_html(self, soup): @@ -186,11 +186,11 @@ class ArretSurImages(BasicNewsRecipe): else: tag.replace_with(img_tag) except Exception as e: - print(f'Erreur processing image: {str(e)}') + print(f'Erreur processing image: {e}') tag.decompose() else: tag.decompose() return soup except Exception as e: - print(f'Erreur preprocessing HTML: {str(e)}') + print(f'Erreur preprocessing HTML: {e}') return soup diff --git a/recipes/asahi_shimbun_en.recipe b/recipes/asahi_shimbun_en.recipe index 7cdbfbbc8b..5f539dc5b7 100644 --- a/recipes/asahi_shimbun_en.recipe +++ b/recipes/asahi_shimbun_en.recipe @@ -1,12 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -__license__ = "GPL v3" -__copyright__ = "2022, Albert Aparicio Isarn " +__license__ = 'GPL v3' +__copyright__ = '2022, Albert Aparicio Isarn ' -""" +''' https://www.asahi.com/ajw/ -""" +''' from datetime import datetime @@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe class AsahiShimbunEnglishNews(BasicNewsRecipe): - title = "The Asahi Shimbun" - __author__ = "Albert Aparicio Isarn" + title = 'The Asahi Shimbun' + __author__ = 'Albert Aparicio Isarn' - description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan." - " The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive" - " coverage of cool Japan,focusing on manga, travel and other timely news.") - publisher = "The Asahi Shimbun Company" - publication_type = "newspaper" - category = "news, japan" - language = "en_JP" + description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.' + ' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive' + ' coverage of cool Japan,focusing on manga, travel and other timely news.') + publisher = 'The Asahi Shimbun Company' + publication_type = 'newspaper' + category = 'news, japan' + language = 'en_JP' - index = "https://www.asahi.com" - masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png" + index = 'https://www.asahi.com' + masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png' oldest_article = 3 max_articles_per_feed = 40 no_stylesheets = True remove_javascript = True - remove_tags_before = {"id": "MainInner"} - remove_tags_after = {"class": "ArticleText"} - remove_tags = [{"name": "div", "class": "SnsUtilityArea"}] + remove_tags_before = {'id': 'MainInner'} + remove_tags_after = {'class': 'ArticleText'} + remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}] def get_whats_new(self): - soup = self.index_to_soup(self.index + "/ajw/new") - news_section = soup.find("div", attrs={"class": "specialList"}) + soup = self.index_to_soup(self.index + '/ajw/new') + news_section = soup.find('div', attrs={'class': 'specialList'}) new_news = [] - for item in news_section.findAll("li"): - title = item.find("p", attrs={"class": "title"}).string - date_string = item.find("p", attrs={"class": "date"}).next + for item in news_section.findAll('li'): + title = item.find('p', attrs={'class': 'title'}).string + date_string = item.find('p', attrs={'class': 'date'}).next date = date_string.strip() - url = self.index + item.find("a")["href"] + url = self.index + item.find('a')['href'] new_news.append( { - "title": title, - "date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), - "url": url, - "description": "", + 'title': title, + 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'), + 'url': url, + 'description': '', } ) return new_news def get_top6(self, soup): - top = soup.find("ul", attrs={"class": "top6"}) + top = soup.find('ul', attrs={'class': 'top6'}) top6_news = [] - for item in top.findAll("li"): - title = item.find("p", attrs={"class": "title"}).string - date_string = item.find("p", attrs={"class": "date"}).next + for item in top.findAll('li'): + title = item.find('p', attrs={'class': 'title'}).string + date_string = item.find('p', attrs={'class': 'date'}).next date = date_string.strip() - url = self.index + item.find("a")["href"] + url = self.index + item.find('a')['href'] top6_news.append( { - "title": title, - "date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), - "url": url, - "description": "", + 'title': title, + 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'), + 'url': url, + 'description': '', } ) return top6_news def get_section_news(self, soup): - news_grid = soup.find("ul", attrs={"class": "default"}) + news_grid = soup.find('ul', attrs={'class': 'default'}) news = [] - for item in news_grid.findAll("li"): - title = item.find("p", attrs={"class": "title"}).string - date_string = item.find("p", attrs={"class": "date"}).next + for item in news_grid.findAll('li'): + title = item.find('p', attrs={'class': 'title'}).string + date_string = item.find('p', attrs={'class': 'date'}).next date = date_string.strip() - url = self.index + item.find("a")["href"] + url = self.index + item.find('a')['href'] news.append( { - "title": title, - "date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), - "url": url, - "description": "", + 'title': title, + 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'), + 'url': url, + 'description': '', } ) return news def get_section(self, section): - soup = self.index_to_soup(self.index + "/ajw/" + section) + soup = self.index_to_soup(self.index + '/ajw/' + section) section_news_items = self.get_top6(soup) section_news_items.extend(self.get_section_news(soup)) @@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe): return section_news_items def get_special_section(self, section): - soup = self.index_to_soup(self.index + "/ajw/" + section) - top = soup.find("div", attrs={"class": "Section"}) + soup = self.index_to_soup(self.index + '/ajw/' + section) + top = soup.find('div', attrs={'class': 'Section'}) special_news = [] - for item in top.findAll("li"): - item_a = item.find("a") + for item in top.findAll('li'): + item_a = item.find('a') - text_split = item_a.text.strip().split("\n") + text_split = item_a.text.strip().split('\n') title = text_split[0] description = text_split[1].strip() - url = self.index + item_a["href"] + url = self.index + item_a['href'] special_news.append( { - "title": title, - "date": "", - "url": url, - "description": description, + 'title': title, + 'date': '', + 'url': url, + 'description': description, } ) @@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe): feeds = [ ("What's New", self.get_whats_new()), - ("National Report", self.get_section("national_report")), - ("Politics", self.get_section("politics")), - ("Business", self.get_section("business")), - ("Asia & World - China", self.get_section("asia_world/china")), - ("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")), - ("Asia & World - Around Asia", self.get_section("asia_world/around_asia")), - ("Asia & World - World", self.get_section("asia_world/world")), - ("Sci & Tech", self.get_section("sci_tech")), - ("Culture - Style", self.get_section("culture/style")), + ('National Report', self.get_section('national_report')), + ('Politics', self.get_section('politics')), + ('Business', self.get_section('business')), + ('Asia & World - China', self.get_section('asia_world/china')), + ('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')), + ('Asia & World - Around Asia', self.get_section('asia_world/around_asia')), + ('Asia & World - World', self.get_section('asia_world/world')), + ('Sci & Tech', self.get_section('sci_tech')), + ('Culture - Style', self.get_section('culture/style')), # ("Culture - Cooking", self.get_section("culture/cooking")), - ("Culture - Movies", self.get_section("culture/movies")), - ("Culture - Manga & Anime", self.get_section("culture/manga_anime")), - ("Travel", self.get_section("travel")), - ("Sports", self.get_section("sports")), - ("Opinion - Editorial", self.get_section("opinion/editorial")), - ("Opinion - Vox Populi", self.get_section("opinion/vox")), - ("Opinion - Views", self.get_section("opinion/views")), - ("Special", self.get_special_section("special")), + ('Culture - Movies', self.get_section('culture/movies')), + ('Culture - Manga & Anime', self.get_section('culture/manga_anime')), + ('Travel', self.get_section('travel')), + ('Sports', self.get_section('sports')), + ('Opinion - Editorial', self.get_section('opinion/editorial')), + ('Opinion - Vox Populi', self.get_section('opinion/vox')), + ('Opinion - Views', self.get_section('opinion/views')), + ('Special', self.get_special_section('special')), ] return feeds diff --git a/recipes/asianreviewofbooks.recipe b/recipes/asianreviewofbooks.recipe index f9e463b545..3a3565c9a3 100644 --- a/recipes/asianreviewofbooks.recipe +++ b/recipes/asianreviewofbooks.recipe @@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AsianReviewOfBooks(BasicNewsRecipe): title = 'The Asian Review of Books' __author__ = 'Darko Miletic' - description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa + description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa: E501 publisher = 'The Asian Review of Books' category = 'literature, books, reviews, Asia' oldest_article = 30 @@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe): publication_type = 'magazine' auto_cleanup = True masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png' - extra_css = """ + extra_css = ''' body{font-family: "Droid Serif", serif} .entry-title {font-family: "Playfair Display", serif} img {display: block} - """ + ''' recipe_specific_options = { 'days': { diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe index 8f9911dcd8..14162aae85 100644 --- a/recipes/astro_news_pl.recipe +++ b/recipes/astro_news_pl.recipe @@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AstroNEWS(BasicNewsRecipe): title = u'AstroNEWS' __author__ = 'fenuks' - description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa + description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa: E501 category = 'astronomy, science' language = 'pl' oldest_article = 8 diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index a8a0d7565b..51b5fda9b5 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -12,7 +12,7 @@ test_article = None # test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed' -# {{{ parse article JSON +# parse article JSON {{{ def process_image_block(lines, block): caption = block.get('captionText') caption_lines = [] diff --git a/recipes/atlantic_com.recipe b/recipes/atlantic_com.recipe index f8a96645bc..496ef3277a 100644 --- a/recipes/atlantic_com.recipe +++ b/recipes/atlantic_com.recipe @@ -12,7 +12,7 @@ test_article = None # test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed' -# {{{ parse article JSON +# parse article JSON {{{ def process_image_block(lines, block): caption = block.get('captionText') caption_lines = [] diff --git a/recipes/attac_es.recipe b/recipes/attac_es.recipe index 6b91dece96..d6882eeaf1 100644 --- a/recipes/attac_es.recipe +++ b/recipes/attac_es.recipe @@ -11,7 +11,7 @@ class AttacEspanaRecipe (BasicNewsRecipe): __license__ = 'GPL v3' __copyright__ = '2012, Marc Busqué ' title = u'attac.es' - description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa + description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa: E501 url = 'http://www.attac.es' language = 'es' tags = 'contrainformación, información alternativa' diff --git a/recipes/avisen_dk.recipe b/recipes/avisen_dk.recipe index 15d8033f24..2b0aa9f8ed 100644 --- a/recipes/avisen_dk.recipe +++ b/recipes/avisen_dk.recipe @@ -24,4 +24,3 @@ class WwwAvisen_dk(BasicNewsRecipe): feeds = [ ('Nyheder fra Avisen.dk', 'http://www.avisen.dk/rss.aspx'), ] - diff --git a/recipes/ba_herald.recipe b/recipes/ba_herald.recipe index 91cac54aa8..40758a84e7 100644 --- a/recipes/ba_herald.recipe +++ b/recipes/ba_herald.recipe @@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe): publication_type = 'newspaper' masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg' INDEX = 'http://www.buenosairesherald.com' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} h1{font-family: Georgia,serif} #fecha{text-align: right; font-size: small} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/backyard_boss.recipe b/recipes/backyard_boss.recipe index 783fd11183..2626a3baec 100644 --- a/recipes/backyard_boss.recipe +++ b/recipes/backyard_boss.recipe @@ -22,21 +22,21 @@ class AdvancedUserRecipe1718382046(BasicNewsRecipe): auto_cleanup = True feeds = [ - #Gardening + # Gardening ('Gardening', 'https://www.backyardboss.net/feed/category/gardening/'), ('Outdoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/outdoor-gardening/'), ('Indoor Gardening', 'https://www.backyardboss.net/feed/category/gardening/indoor-gardening/'), ('Fruits & Vegetables', 'https://www.backyardboss.net/feed/tag/gardening/fruits-and-vegetables/'), ('Houseplants', 'https://www.backyardboss.net/feed/category/gardening/houseplants/'), ('Plant Care', 'https://www.backyardboss.net/feed/category/gardening/plant-care/'), - #Backyard + # Backyard ('Backyard', 'https://www.backyardboss.net/feed/category/backyard/'), ('Home Improvement', 'https://www.backyardboss.net/feed/category/backyard/home-improvement/'), ('Lawn Care', 'https://www.backyardboss.net/feed/category/backyard/lawn-care/'), ('Landscaping', 'https://www.backyardboss.net/feed/category/backyard/landscape-industry/'), ('Barbecue', 'https://www.backyardboss.net/feed/category/backyard/bbq/'), ('Reviews', 'https://www.backyardboss.net/feed/category/backyard/reviews/'), - #DIY & Project + # DIY & Project ('DIY & Projects', 'https://www.backyardboss.net/feed/category/diy/'), ('How-To', 'https://www.backyardboss.net/feed/category/diy/how-to/'), ('Designs & Ideas', 'https://www.backyardboss.net/feed/category/diy/designs-and-ideas/'), diff --git a/recipes/baikaljournal.recipe b/recipes/baikaljournal.recipe index 58d4e86d07..c134c2f024 100644 --- a/recipes/baikaljournal.recipe +++ b/recipes/baikaljournal.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class BaikalJournal(BasicNewsRecipe): title = '\u041B\u044E\u0434\u0438 \u0411\u0430\u0439\u043A\u0430\u043B\u0430' __author__ = 'bugmen00t' - description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa - publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa + description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u044B\u0439 \u0430\u0432\u0442\u043E\u0440\u0441\u043A\u0438\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0436\u0443\u0440\u043D\u0430\u043B \u043E \u0436\u0438\u0437\u043D\u0438 \u0432 \u0421\u0438\u0431\u0438\u0440\u0438 \u0438 \u0432\u043E\u043A\u0440\u0443\u0433 \u0411\u0430\u0439\u043A\u0430\u043B\u0430, \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u044E\u0449\u0438\u0439 \u0436\u0438\u0437\u043D\u044C \u0432 \u0441\u0438\u0431\u0438\u0440\u0441\u043A\u043E\u0439 \u0433\u043B\u0443\u0431\u0438\u043D\u043A\u0435.' # noqa: E501 + publisher = '\u041E\u043B\u044C\u0433\u0430 \u041C\u0443\u0442\u043E\u0432\u0438\u043D\u0430, \u0415\u043B\u0435\u043D\u0430 \u0422\u0440\u0438\u0444\u043E\u043D\u043E\u0432\u0430' # noqa: E501 category = 'blog' cover_url = u'https://baikal-journal.ru/wp-content/themes/baikal/assets/img/logo-full.svg' language = 'ru' diff --git a/recipes/bangkokpost.recipe b/recipes/bangkokpost.recipe index 464ad9800c..259a7bcb6c 100644 --- a/recipes/bangkokpost.recipe +++ b/recipes/bangkokpost.recipe @@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe): title = u'Bangkok Post' publisher = u'Post Publishing PCL' category = u'News' - description = u'The world\'s window to Thailand' + description = u"The world's window to Thailand" oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/bar_and_bench.recipe b/recipes/bar_and_bench.recipe index 9fe81716bb..f6ebe248ad 100644 --- a/recipes/bar_and_bench.recipe +++ b/recipes/bar_and_bench.recipe @@ -26,7 +26,7 @@ class bar(BasicNewsRecipe): prefixed_classes( 'text-story-m_story-tags__ story-footer-module__metype__' ), - dict(name = 'svg') + dict(name='svg') ] def preprocess_html(self, soup): diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index 4ec30b325c..007605fd0e 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes class barrons(BasicNewsRecipe): - title = 'Barron\'s Magazine' + title = "Barron's Magazine" __author__ = 'unkn0wn' description = ( - 'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister ' - 'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and ' + "Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister " + "publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and " 'relevant statistics.' ) language = 'en_US' @@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe): recipe_specific_options = { 'date': { 'short': 'The date of the edition to download (YYYYMMDD format)', - 'long': 'For example, 20240722.\nIf it didn\'t work, try again later.' + 'long': "For example, 20240722.\nIf it didn't work, try again later." } } @@ -119,7 +119,7 @@ class barrons(BasicNewsRecipe): byl = articles.find(**prefixed_classes('BarronsTheme--byline--')) if byl: desc += self.tag_to_string(byl) - ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--')) + ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--')) if ttr: desc += self.tag_to_string(ttr) summ = articles.find(**prefixed_classes('BarronsTheme--summary--')) @@ -127,7 +127,7 @@ class barrons(BasicNewsRecipe): desc += ' | ' + self.tag_to_string(summ) self.log('\t', title, ' ', url, '\n\t', desc) ans[section].append({'title': title, 'url': url, 'description': desc}) - return [(section, articles) for section, articles in ans.items()] + return list(ans.items()) def print_version(self, url): return url.split('?')[0].replace('/articles/', '/amp/articles/') diff --git a/recipes/bbc.recipe b/recipes/bbc.recipe index 7302d02d3f..a872d50550 100644 --- a/recipes/bbc.recipe +++ b/recipes/bbc.recipe @@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe): # Select / de-select the feeds you want in your ebook. feeds = [ - ("News Home", "https://feeds.bbci.co.uk/news/rss.xml"), - ("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"), - ("World", "https://feeds.bbci.co.uk/news/world/rss.xml"), + ('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'), + ('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'), + ('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'), # ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"), # ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"), # ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"), @@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe): # ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"), # ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"), # ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"), - ("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"), - ("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"), - ("Science/Environment", - "https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"), - ("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"), - ("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"), - ("Entertainment/Arts", - "https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"), + ('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'), + ('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'), + ('Science/Environment', + 'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'), + ('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'), + ('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'), + ('Entertainment/Arts', + 'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'), # ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"), # ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"), - ("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"), - ("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"), - ("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"), + ('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'), + ('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'), + ('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'), # ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"), # ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"), # ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"), # ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"), # ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"), - ("Sport Front Page", - "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"), + ('Sport Front Page', + 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'), # ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"), # ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"), # ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"), diff --git a/recipes/bbc_brasil.recipe b/recipes/bbc_brasil.recipe index 840c589401..d6e8619cdf 100644 --- a/recipes/bbc_brasil.recipe +++ b/recipes/bbc_brasil.recipe @@ -201,24 +201,24 @@ class BBCBrasilRecipe(BasicNewsRecipe): conversion_options = {'smarten_punctuation': True} # Specify extra CSS - overrides ALL other CSS (IE. Added last). - extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ - .introduction, .first { font-weight: bold; } \ - .cross-head { font-weight: bold; font-size: 125%; } \ - .cap, .caption { display: block; font-size: 80%; font-style: italic; } \ - .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \ - .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ - .correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \ - text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \ - .story-date, .published, .datestamp { font-size: 80%; } \ - table { width: 100%; } \ - td img { display: block; margin: 5px auto; } \ - ul { padding-top: 10px; } \ - ol { padding-top: 10px; } \ - li { padding-top: 5px; padding-bottom: 5px; } \ - h1 { text-align: center; font-size: 175%; font-weight: bold; } \ - h2 { text-align: center; font-size: 150%; font-weight: bold; } \ - h3 { text-align: center; font-size: 125%; font-weight: bold; } \ - h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }' + extra_css = '''body { font-family: verdana, helvetica, sans-serif; } + .introduction, .first { font-weight: bold; } + .cross-head { font-weight: bold; font-size: 125%; } + .cap, .caption { display: block; font-size: 80%; font-style: italic; } + .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } + .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, + .correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; + text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } + .story-date, .published, .datestamp { font-size: 80%; } + table { width: 100%; } + td img { display: block; margin: 5px auto; } + ul { padding-top: 10px; } + ol { padding-top: 10px; } + li { padding-top: 5px; padding-bottom: 5px; } + h1 { text-align: center; font-size: 175%; font-weight: bold; } + h2 { text-align: center; font-size: 150%; font-weight: bold; } + h3 { text-align: center; font-size: 125%; font-weight: bold; } + h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }''' # Remove various tag attributes to improve the look of the ebook pages. remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', @@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe): def print_version(self, url): # Handle sports page urls type 01: - if (url.find("go/rss/-/sport1/") != -1): - temp_url = url.replace("go/rss/-/", "") + if (url.find('go/rss/-/sport1/') != -1): + temp_url = url.replace('go/rss/-/', '') # Handle sports page urls type 02: - elif (url.find("go/rss/int/news/-/sport1/") != -1): - temp_url = url.replace("go/rss/int/news/-/", "") + elif (url.find('go/rss/int/news/-/sport1/') != -1): + temp_url = url.replace('go/rss/int/news/-/', '') # Handle regular news page urls: else: - temp_url = url.replace("go/rss/int/news/-/", "") + temp_url = url.replace('go/rss/int/news/-/', '') # Always add "?print=true" to the end of the url. - print_url = temp_url + "?print=true" + print_url = temp_url + '?print=true' return print_url diff --git a/recipes/bbc_es.recipe b/recipes/bbc_es.recipe index 4c2e51ed74..e5a7fd49f4 100644 --- a/recipes/bbc_es.recipe +++ b/recipes/bbc_es.recipe @@ -36,7 +36,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class': [' g-w8']}) remove_tags = [ - dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa + dict(name='ul', attrs={'class': ['document-tools blq-clearfix', 'blq-clearfix']}), dict(name='div', attrs={'class': ['box bx-quote-bubble', 'socialmedia-links', 'list li-carousel', 'list li-plain rolling-news', 'list li-plain', 'box bx-livestats', 'li-tab content', 'list li-relatedlinks', 'list li-relatedinternetlinks']}) # noqa: E501 ] feeds = [ diff --git a/recipes/bbc_fast.recipe b/recipes/bbc_fast.recipe index a53d5a6db9..a7fa84aaa5 100644 --- a/recipes/bbc_fast.recipe +++ b/recipes/bbc_fast.recipe @@ -134,7 +134,7 @@ if __name__ == '__main__': class BBC(BasicNewsRecipe): title = 'BBC News (fast)' __author__ = 'Kovid Goyal' - description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa + description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.' # noqa: E501 oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/bbc_sport.recipe b/recipes/bbc_sport.recipe index 62d16ef337..9f6a8b7411 100644 --- a/recipes/bbc_sport.recipe +++ b/recipes/bbc_sport.recipe @@ -21,7 +21,7 @@ class BBC(BasicNewsRecipe): category = 'sport, news, UK, world' language = 'en_GB' publication_type = 'newsportal' - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501 preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True diff --git a/recipes/before_we_go.recipe b/recipes/before_we_go.recipe index 062a02d62e..cb0516083c 100644 --- a/recipes/before_we_go.recipe +++ b/recipes/before_we_go.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python - # vim:fileencoding=utf-8 +# vim:fileencoding=utf-8 from __future__ import absolute_import, division, print_function, unicode_literals @@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe class BeforeWeGo(BasicNewsRecipe): title = 'Before We Go' __author__ = 'bugmen00t' - description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa + description = 'Before We Go Blog is a collective of Fantasy, Sci-Fi and Graphic Novel fans from around the world, passionate about providing awesome, enjoyable reviews for anyone who loves a good book!' # noqa: E501 publisher = 'BEFOREWEGOBLOG' category = 'blog' -# cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg' + # cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/09/beth-with-long-hair.jpeg' cover_url = u'https://i0.wp.com/beforewegoblog.com/wp-content/uploads/2021/01/before-we-go-blog-1.png' language = 'en' no_stylesheets = True @@ -24,9 +24,9 @@ class BeforeWeGo(BasicNewsRecipe): remove_tags_before = dict(name='h1', attrs={'class': 'entry-title'}) remove_tags_after = dict(name='div', attrs={'id': 'author-bio'}) -# remove_tags_after = dict(name='article') + # remove_tags_after = dict(name='article') - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}), dict(name='span', attrs={'class': 'meta-comment-count'}), dict(name='p', attrs={'id': 'breadcrumbs'}) diff --git a/recipes/bellingcat_en.recipe b/recipes/bellingcat_en.recipe index 2931ca7196..4c80bf220c 100644 --- a/recipes/bellingcat_en.recipe +++ b/recipes/bellingcat_en.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = 'Bellingcat' __author__ = 'bugmen00t' - description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa + description = 'Bellingcat is an independent international collective of researchers, investigators and citizen journalists using open source and social media investigation to probe a variety of subjects – from Mexican drug lords and crimes against humanity, to tracking the use of chemical weapons and conflicts worldwide. With staff and contributors in more than 20 countries around the world, we operate in a unique field where advanced technology, forensic research, journalism, investigations, transparency and accountability come together.' # noqa: E501 publisher = 'Stichting Bellingcat' category = 'blog' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' diff --git a/recipes/bellingcat_es.recipe b/recipes/bellingcat_es.recipe index 239fb6ed15..191a4f8460 100644 --- a/recipes/bellingcat_es.recipe +++ b/recipes/bellingcat_es.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = 'Bellingcat' __author__ = 'bugmen00t' - description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa + description = 'Bellingcat es un colectivo internacional independiente de investigadores y periodistas ciudadanos que usan informaci\u00F3n de fuente abierta y redes sociales para investigar una gran variedad de temas, desde carteles de droga en M\u00E9xico y cr\u00EDmenes de lesa humanidad hasta el rastreo de armas qu\u00EDmicas en zonas de conflicto alrededor del mundo. Nosotros operamos en un campo \u00FAnico donde la tecnolog\u00EDa avanzada, las investigaciones forenses, el periodismo, y la transparencia y responsabilidad se unen.' # noqa: E501 publisher = 'Stichting Bellingcat' category = 'blog' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' diff --git a/recipes/bellingcat_fr.recipe b/recipes/bellingcat_fr.recipe index ea119aacc6..14a60e0a5c 100644 --- a/recipes/bellingcat_fr.recipe +++ b/recipes/bellingcat_fr.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = 'Bellingcat' __author__ = 'bugmen00t' - description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa + description = 'Bellingcat est un groupe international ind\u00E9pendant de chercheurs, d\u0027enqu\u00EAteurs et de journalistes citoyens utilisant \u00E0 la fois: enqu\u00EAtes \u0027open source\u0027 et r\u00E9seaux sociaux, pour sonder une vari\u00E9t\u00E9 de sujets - trafiquants de drogue mexicains, crimes contre l\u0027humanit\u00E9, suivi de l\u0027utilisation d\u0027armes chimiques et conflits dans le monde entier. Nous op\u00E9rons dans un domaine unique dans lequel technologie de pointe, recherche m\u00E9dico-l\u00E9gale, journalisme, enqu\u00EAtes, transparence et responsabilit\u00E9 se rencontrent.' # noqa: E501 publisher = 'Stichting Bellingcat' category = 'blog' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' diff --git a/recipes/bellingcat_ru.recipe b/recipes/bellingcat_ru.recipe index a5884f2b5f..60b27faa85 100644 --- a/recipes/bellingcat_ru.recipe +++ b/recipes/bellingcat_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = '\u0411\u0435\u043B\u043B\u0438\u043D\u0433\u043A\u044D\u0442 (bell\u00BFngcat)' __author__ = 'bugmen00t' - description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa + description = 'Bellingcat \u2014 \u044D\u0442\u043E \u043D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u0430\u044F \u043C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0430\u044F \u043A\u043E\u043C\u0430\u043D\u0434\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u0435\u0439 \u0438 \u0433\u0440\u0430\u0436\u0434\u0430\u043D\u0441\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u044E\u0442 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u0435 \u0438\u0441\u0442\u043E\u0447\u043D\u0438\u043A\u0438 \u0438 \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0441\u0435\u0442\u0438 \u0434\u043B\u044F \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u0439 \u043D\u0430 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0435 \u0442\u0435\u043C\u044B \u2014 \u043E\u0442 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u043A\u0430\u0440\u0442\u0435\u043B\u0435\u0439 \u0438 \u043F\u0440\u0435\u0441\u0442\u0443\u043F\u043B\u0435\u043D\u0438\u0439 \u043F\u0440\u043E\u0442\u0438\u0432 \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0430 \u0434\u043E \u043E\u0442\u0441\u043B\u0435\u0436\u0438\u0432\u0430\u043D\u0438\u044F \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u044F \u0445\u0438\u043C\u0438\u0447\u0435\u0441\u043A\u043E\u0433\u043E \u043E\u0440\u0443\u0436\u0438\u044F \u0438 \u043A\u043E\u043D\u0444\u043B\u0438\u043A\u0442\u043E\u0432 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u2014 \u0441\u043E\u0447\u0435\u0442\u0430\u044F \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0435 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438, \u043A\u0440\u0438\u043C\u0438\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u0438\u0437\u044B, \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0443, \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F \u0438 \u0431\u043E\u0440\u044C\u0431\u0443 \u0437\u0430 \u043F\u0440\u043E\u0437\u0440\u0430\u0447\u043D\u043E\u0441\u0442\u044C \u0438 \u043E\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0435\u043D\u043D\u043E\u0441\u0442\u044C.' # noqa: E501 publisher = 'Stichting Bellingcat' category = 'blog' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' diff --git a/recipes/bellingcat_uk.recipe b/recipes/bellingcat_uk.recipe index fa5cd98be1..5d6a482904 100644 --- a/recipes/bellingcat_uk.recipe +++ b/recipes/bellingcat_uk.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 (bell\u00BFngcat)' __author__ = 'bugmen00t' - description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa + description = '\u0411\u0435\u043B\u043B\u0456\u043D\u0433\u043A\u0435\u0442 \u2014 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0438\u0439 \u043C\u0456\u0436\u043D\u0430\u0440\u043E\u0434\u043D\u0438\u0439 \u043A\u043E\u043B\u0435\u043A\u0442\u0438\u0432 \u0434\u043E\u0441\u043B\u0456\u0434\u043D\u0438\u043A\u0456\u0432, \u0441\u043B\u0456\u0434\u0447\u0438\u0445 \u0456 \u0433\u0440\u043E\u043C\u0430\u0434\u044F\u043D\u0441\u044C\u043A\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0456\u0432, \u044F\u043A\u0456 \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u043E\u0432\u0443\u044E\u0442\u044C \u0432\u0456\u0434\u043A\u0440\u0438\u0442\u0456 \u0434\u0436\u0435\u0440\u0435\u043B\u0430 \u0442\u0430 \u0441\u043E\u0446\u0456\u0430\u043B\u044C\u043D\u0456 \u043C\u0435\u0440\u0435\u0436\u0456 \u0434\u043B\u044F \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F \u0440\u0456\u0437\u043D\u043E\u043C\u0430\u043D\u0456\u0442\u043D\u0438\u0445 \u0442\u0435\u043C \u2014 \u0432\u0456\u0434 \u043C\u0435\u043A\u0441\u0438\u043A\u0430\u043D\u0441\u044C\u043A\u0438\u0445 \u043D\u0430\u0440\u043A\u043E\u0431\u0430\u0440\u043E\u043D\u0456\u0432 \u0456 \u0437\u043B\u043E\u0447\u0438\u043D\u0456\u0432 \u043F\u0440\u043E\u0442\u0438 \u043B\u044E\u0434\u0441\u0442\u0432\u0430, \u0434\u043E \u0432\u0456\u0434\u0441\u0442\u0435\u0436\u0435\u043D\u043D\u044F \u0432\u0438\u043A\u043E\u0440\u0438\u0441\u0442\u0430\u043D\u043D\u044F \u0445\u0456\u043C\u0456\u0447\u043D\u043E\u0457 \u0437\u0431\u0440\u043E\u0457 \u0442\u0430 \u043A\u043E\u043D\u0444\u043B\u0456\u043A\u0442\u0456\u0432 \u0443 \u0432\u0441\u044C\u043E\u043C\u0443 \u0441\u0432\u0456\u0442\u0456. \u041C\u0438 \u043F\u0440\u0430\u0446\u044E\u0454\u043C\u043E \u0432 \u0443\u043D\u0456\u043A\u0430\u043B\u044C\u043D\u0456\u0439 \u0441\u0444\u0435\u0440\u0456, \u0434\u0435 \u043F\u0435\u0440\u0435\u0434\u043E\u0432\u0456 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0441\u0443\u0434\u043E\u0432\u043E-\u043C\u0435\u0434\u0438\u0447\u043D\u0456 \u0434\u043E\u0441\u043B\u0456\u0434\u0436\u0435\u043D\u043D\u044F, \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0438\u043A\u0430, \u0440\u043E\u0437\u0441\u043B\u0456\u0434\u0443\u0432\u0430\u043D\u043D\u044F, \u043F\u0440\u043E\u0437\u043E\u0440\u0456\u0441\u0442\u044C \u0456 \u0432\u0456\u0434\u043F\u043E\u0432\u0456\u0434\u0430\u043B\u044C\u043D\u0456\u0441\u0442\u044C \u043E\u0431\u2019\u0454\u0434\u043D\u0443\u044E\u0442\u044C\u0441\u044F.' # noqa: E501 publisher = 'Stichting Bellingcat' category = 'blog' cover_url = u'https://www.bellingcat.com/app/uploads/2018/04/bellingcat_HP_logo_black.jpg' diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index 3b588b72dd..e187756553 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -17,13 +17,13 @@ class BenchmarkPl(BasicNewsRecipe): extra_css = 'ul {list-style-type: none;}' no_stylesheets = True use_embedded_content = False - preprocess_regexps = [(re.compile(u'

 Zobacz poprzednie Opinie dnia:.*', # noqa - re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Więcej o .*?', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa + preprocess_regexps = [(re.compile(u'

 Zobacz poprzednie Opinie dnia:.*', # noqa: E501, RUF039 + re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Więcej o .*?', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: RUF039 keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict( name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')] remove_tags_after = dict(id='article') - remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={ 'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa + remove_tags = [dict(name='div', attrs={'class': ['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background': 'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width': '210', 'cellspacing': '1', 'cellpadding': '4', 'border': '0', 'align': 'right'})] # noqa: E501 INDEX = 'http://www.benchmark.pl' feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), diff --git a/recipes/berfreunde_blog.recipe b/recipes/berfreunde_blog.recipe index 997588ecd8..cebf908515 100644 --- a/recipes/berfreunde_blog.recipe +++ b/recipes/berfreunde_blog.recipe @@ -25,7 +25,6 @@ class bergfreunde_blog(BasicNewsRecipe): __author__ = 'VoHe' no_stylesheets = True remove_javascript = True - remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} diff --git a/recipes/bild_de.recipe b/recipes/bild_de.recipe index a4150f80fc..2801ed376b 100644 --- a/recipes/bild_de.recipe +++ b/recipes/bild_de.recipe @@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): dict( attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}), dict(name='img', attrs={'alt': 'logo'}), - dict(name='div', attrs={'class': re.compile('infoEl')}), - dict(name='span', attrs={'class': re.compile('loupe')}) + dict(name='div', attrs={'class': re.compile(r'infoEl')}), + dict(name='span', attrs={'class': re.compile(r'loupe')}) ] remove_tags_after = [ - dict(name='div', attrs={'itemprop': re.compile('articleBody')}) + dict(name='div', attrs={'itemprop': re.compile(r'articleBody')}) ] def preprocess_html(self, soup): diff --git a/recipes/billorielly.recipe b/recipes/billorielly.recipe index 3369cc49e2..bb87260362 100644 --- a/recipes/billorielly.recipe +++ b/recipes/billorielly.recipe @@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe): feeds.append(("O'Reilly Factor", articles_shows)) if articles_columns: - feeds.append(("Newspaper Column", articles_columns)) + feeds.append(('Newspaper Column', articles_columns)) return feeds @@ -49,8 +49,7 @@ class BillOReilly(BasicNewsRecipe): continue if url.startswith('/'): - url = 'http://www.billoreilly.com' + url + \ - '&dest=/pg/jsp/community/tvshowprint.jsp' + url = 'http://www.billoreilly.com' + url + '&dest=/pg/jsp/community/tvshowprint.jsp' self.log('\t\tFound article:', title) self.log('\t\t\t', url) diff --git a/recipes/birmingham_evening_mail.recipe b/recipes/birmingham_evening_mail.recipe index 8217f13d73..8549dd2216 100644 --- a/recipes/birmingham_evening_mail.recipe +++ b/recipes/birmingham_evening_mail.recipe @@ -57,8 +57,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('http://www.birminghammail.co.uk') - cov = soup.find(attrs={'src': re.compile( - 'http://images.icnetwork.co.uk/upl/birm')}) + cov = soup.find(attrs={'src': re.compile(r'http://images.icnetwork.co.uk/upl/birm')}) cov = str(cov) cov2 = re.findall( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) diff --git a/recipes/blesk.recipe b/recipes/blesk.recipe index 8757bf4dcf..3a50ee8b8b 100644 --- a/recipes/blesk.recipe +++ b/recipes/blesk.recipe @@ -27,15 +27,15 @@ class bleskRecipe(BasicNewsRecipe): cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png' remove_javascript = True no_stylesheets = True - extra_css = """ - """ + extra_css = ''' + ''' remove_attributes = [] remove_tags_before = dict(name='div', attrs={'id': ['boxContent']}) remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']}) - remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}), - dict(name='div', attrs={'id': ['partHeader']}), - dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})] + remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}), + dict(name='div', attrs={'id': ['partHeader']}), + dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})] preprocess_regexps = [(re.compile(r'
')] diff --git a/recipes/blic.recipe b/recipes/blic.recipe index 7368ec530e..ec44e53123 100644 --- a/recipes/blic.recipe +++ b/recipes/blic.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Blic(BasicNewsRecipe): title = 'Blic' __author__ = 'Darko Miletic' - description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa + description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' # noqa: E501 publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 @@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe): masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png' language = 'sr' publication_type = 'newspaper' - extra_css = """ + extra_css = ''' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} @@ -35,13 +35,13 @@ class Blic(BasicNewsRecipe): .potpis{font-size: x-small; color: gray} .article_info{font-size: small} img{margin-bottom: 0.8em; margin-top: 0.8em; display: block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 remove_tags_before = dict(name='div', attrs={'id': 'article_info'}) remove_tags = [ dict(name=['object', 'link', 'meta', 'base', 'object', 'embed'])] diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index 8d3753fc20..e13167ca14 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe): masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg' description = ( 'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,' - ' companies, events, and trends shaping today\'s complex, global economy.' + " companies, events, and trends shaping today's complex, global economy." ) remove_empty_feeds = True @@ -124,8 +124,8 @@ class Bloomberg(BasicNewsRecipe): cat = '
' + data['primaryCategory'] + '
' if 'abstract' in data and data['abstract'] and data['abstract'] is not None: - subhead = '
  • ' + '
  • '.join([x for x in data['abstract']]) + '
' - elif 'summary' in data and data['summary']: + subhead = '
  • ' + '
  • '.join(list(data['abstract'])) + '
' + elif data.get('summary'): subhead = '

' + data['summary'] + '

' if 'byline' in data and data['byline'] is not None: diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index e2bc3f3831..40846337dd 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -58,7 +58,7 @@ class Bloomberg(BasicNewsRecipe): 'Bloomberg delivers business and markets news, data, analysis, and video' ' to the world, featuring stories from Businessweek and Bloomberg News.' ) - oldest_article = 1.2 # days + oldest_article = 1.2 # days resolve_internal_links = True remove_empty_feeds = True cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg' @@ -134,8 +134,8 @@ class Bloomberg(BasicNewsRecipe): cat = '
' + data['primaryCategory'] + '
' if 'abstract' in data and data['abstract'] and data['abstract'] is not None: - subhead = '
  • ' + '
  • '.join([x for x in data['abstract']]) + '
' - elif 'summary' in data and data['summary']: + subhead = '
  • ' + '
  • '.join(list(data['abstract'])) + '
' + elif data.get('summary'): subhead = '

' + data['summary'] + '

' if 'byline' in data and data['byline'] is not None: diff --git a/recipes/bookforummagazine.recipe b/recipes/bookforummagazine.recipe index cab082a8e3..f5e8cbdb98 100644 --- a/recipes/bookforummagazine.recipe +++ b/recipes/bookforummagazine.recipe @@ -2,29 +2,29 @@ from urllib.parse import urljoin from calibre.web.feeds.news import BasicNewsRecipe -_issue_url = "" +_issue_url = '' class BookforumMagazine(BasicNewsRecipe): - title = "Bookforum" + title = 'Bookforum' description = ( - "Bookforum is an American book review magazine devoted to books and " - "the discussion of literature. https://www.bookforum.com/print" + 'Bookforum is an American book review magazine devoted to books and ' + 'the discussion of literature. https://www.bookforum.com/print' ) - language = "en" - __author__ = "ping" - publication_type = "magazine" - encoding = "utf-8" + language = 'en' + __author__ = 'ping' + publication_type = 'magazine' + encoding = 'utf-8' remove_javascript = True no_stylesheets = True auto_cleanup = False compress_news_images = True compress_news_images_auto_size = 8 - keep_only_tags = [dict(class_="blog-article")] - remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])] + keep_only_tags = [dict(class_='blog-article')] + remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])] - extra_css = """ + extra_css = ''' .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; } .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; } @@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe): display: block; max-width: 100%; height: auto; } .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; } - """ + ''' def preprocess_html(self, soup): # strip away links that's not needed - for ele in soup.select(".blog-article__header a"): + for ele in soup.select('.blog-article__header a'): ele.unwrap() return soup def parse_index(self): soup = self.index_to_soup( - _issue_url if _issue_url else "https://www.bookforum.com/print" + _issue_url if _issue_url else 'https://www.bookforum.com/print' ) - meta_ele = soup.find("meta", property="og:title") + meta_ele = soup.find('meta', property='og:title') if meta_ele: self.timefmt = f' [{meta_ele["content"]}]' - cover_ele = soup.find("img", class_="toc-issue__cover") + cover_ele = soup.find('img', class_='toc-issue__cover') if cover_ele: self.cover_url = urljoin( - "https://www.bookforum.com", - soup.find("img", class_="toc-issue__cover")["src"], + 'https://www.bookforum.com', + soup.find('img', class_='toc-issue__cover')['src'], ) articles = {} - for sect_ele in soup.find_all("div", class_="toc-articles__section"): + for sect_ele in soup.find_all('div', class_='toc-articles__section'): section_name = self.tag_to_string( - sect_ele.find("a", class_="toc__anchor-links__link") + sect_ele.find('a', class_='toc__anchor-links__link') ) - for article_ele in sect_ele.find_all("article"): - title_ele = article_ele.find("h1") - sub_title_ele = article_ele.find(class_="toc-article__subtitle") + for article_ele in sect_ele.find_all('article'): + title_ele = article_ele.find('h1') + sub_title_ele = article_ele.find(class_='toc-article__subtitle') articles.setdefault(section_name, []).append( { - "title": self.tag_to_string(title_ele), - "url": article_ele.find("a", class_="toc-article__link")[ - "href" + 'title': self.tag_to_string(title_ele), + 'url': article_ele.find('a', class_='toc-article__link')[ + 'href' ], - "description": self.tag_to_string(sub_title_ele) + 'description': self.tag_to_string(sub_title_ele) if sub_title_ele - else "", + else '', } ) return articles.items() diff --git a/recipes/borsen_dk.recipe b/recipes/borsen_dk.recipe index 3a4e47f345..5414105b39 100644 --- a/recipes/borsen_dk.recipe +++ b/recipes/borsen_dk.recipe @@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe): language = 'da' keep_only_tags = [ - dict(name="h1", attrs={'itemprop': 'headline'}), - dict(name="div", attrs={'itemprob': 'datePublished'}), - dict(name="div", attrs={'itemprop': 'articleBody'}), + dict(name='h1', attrs={'itemprop': 'headline'}), + dict(name='div', attrs={'itemprob': 'datePublished'}), + dict(name='div', attrs={'itemprop': 'articleBody'}), ] # Feed are found here: diff --git a/recipes/boston.com.recipe b/recipes/boston.com.recipe index 82f75c53d4..e4cbd3cd2f 100644 --- a/recipes/boston.com.recipe +++ b/recipes/boston.com.recipe @@ -42,24 +42,24 @@ def class_startswith(*prefixes): # From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true comics_to_fetch = { - "ADAM@HOME": 'ad', - "ARLO & JANIS": 'aj', + 'ADAM@HOME': 'ad', + 'ARLO & JANIS': 'aj', # "CUL DE SAC": 'cds', # "CURTIS": 'kfcrt', - "DILBERT": 'dt', - "DOONESBURY": 'db', - "DUSTIN": 'kfdus', - "F MINUS": 'fm', - "FOR BETTER OR WORSE": 'fb', + 'DILBERT': 'dt', + 'DOONESBURY': 'db', + 'DUSTIN': 'kfdus', + 'F MINUS': 'fm', + 'FOR BETTER OR WORSE': 'fb', # "GET FUZZY": 'gz', # "MOTHER GOOSE & GRIMM": 'tmmgg', # "JUMPSTART": 'jt', - "MONTY": 'mt', + 'MONTY': 'mt', # "POOCH CAFE", - "RHYMES WITH ORANGE": 'kfrwo', + 'RHYMES WITH ORANGE': 'kfrwo', # "ROSE IS ROSE": 'rr', # "ZIPPY THE PINHEAD": 'kfzpy', - "ZITS": 'kfzt' + 'ZITS': 'kfzt' } @@ -77,10 +77,10 @@ def extract_json(raw_html): def absolutize_url(url): - if url.startswith("//"): - return "https:" + url + if url.startswith('//'): + return 'https:' + url if url.startswith('/'): - url = "https://www.bostonglobe.com" + url + url = 'https://www.bostonglobe.com' + url return url @@ -120,7 +120,7 @@ def main(): class BostonGlobeSubscription(BasicNewsRecipe): - title = "Boston Globe" + title = 'Boston Globe' __author__ = 'Kovid Goyal' description = 'The Boston Globe' language = 'en_US' diff --git a/recipes/boston_globe_print_edition.recipe b/recipes/boston_globe_print_edition.recipe index 8c5e81df7c..7a48a86b43 100644 --- a/recipes/boston_globe_print_edition.recipe +++ b/recipes/boston_globe_print_edition.recipe @@ -12,6 +12,7 @@ def class_as_string(x): x = ' '.join(x) return x + def class_startswith(*prefixes): def q(x): @@ -24,18 +25,19 @@ def class_startswith(*prefixes): return dict(attrs={'class': q}) + def absolutize_url(url): - if url.startswith("//"): - return "https:" + url + if url.startswith('//'): + return 'https:' + url if url.startswith('/'): - url = "https://www.bostonglobe.com" + url + url = 'https://www.bostonglobe.com' + url return url class BostonGlobePrint(BasicNewsRecipe): - title = "Boston Globe | Print Edition" + title = 'Boston Globe | Print Edition' __author__ = 'Kovid Goyal, unkn0wn' - description = 'The Boston Globe - Today\'s Paper' + description = "The Boston Globe - Today's Paper" language = 'en_US' keep_only_tags = [ @@ -70,7 +72,7 @@ class BostonGlobePrint(BasicNewsRecipe): for image in soup.findAll('img', src=True): if image['src'].endswith('750.jpg'): return 'https:' + image['src'] - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover @@ -94,8 +96,8 @@ class BostonGlobePrint(BasicNewsRecipe): desc = self.tag_to_string(d) self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) - feeds_dict[section].append({"title": title, "url": url, "description": desc}) - return [(section, articles) for section, articles in feeds_dict.items()] + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) + return list(feeds_dict.items()) def preprocess_raw_html(self, raw_html, url): soup = self.index_to_soup(raw_html) diff --git a/recipes/breaking_mad.recipe b/recipes/breaking_mad.recipe index e0b0574b65..d9cc454363 100644 --- a/recipes/breaking_mad.recipe +++ b/recipes/breaking_mad.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1467715002(BasicNewsRecipe): title = 'Breaking Mad' __author__ = 'bugmen00t' - description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa + description = 'Breaking Mad - пока смерть не насмешит нас. Breakingmad - это агрегатор самых странных и драматичных новостей с жизнеутверждающим эффектом. Breakingmad - это все, что вы должны знать о России и не только.' # noqa: E501 publisher = 'BreakingMad' category = 'news' cover_url = u'http://breakingmad.me/images/logo.png' diff --git a/recipes/brewiarz.recipe b/recipes/brewiarz.recipe index ada68ebd17..0eb5cf0542 100644 --- a/recipes/brewiarz.recipe +++ b/recipes/brewiarz.recipe @@ -5,7 +5,6 @@ from __future__ import print_function __license__ = 'GPL v3' import datetime -import re from calibre.web.feeds.news import BasicNewsRecipe @@ -23,40 +22,39 @@ class brewiarz(BasicNewsRecipe): next_days = 1 def parse_index(self): - dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv", - "05": "v", "06": "vi", "07": "vii", "08": "viii", - "09": "ix", "10": "x", "11": "xi", "12": "xii"} + dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv', + '05': 'v', '06': 'vi', '07': 'vii', '08': 'viii', + '09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'} - weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek", - "Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"} + weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek', + 'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'} now = datetime.datetime.now() feeds = [] - for i in range(0, self.next_days): + for i in range(self.next_days): url_date = now + datetime.timedelta(days=i) - url_date_month = url_date.strftime("%m") + url_date_month = url_date.strftime('%m') url_date_month_roman = dec2rom_dict[url_date_month] - url_date_day = url_date.strftime("%d") - url_date_year = url_date.strftime("%Y")[2:] - url_date_weekday = url_date.strftime("%A") + url_date_day = url_date.strftime('%d') + url_date_year = url_date.strftime('%Y')[2:] + url_date_weekday = url_date.strftime('%A') url_date_weekday_pl = weekday_dict[url_date_weekday] - url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \ - url_date_year + "/" + url_date_day + url_date_month + "/index.php3" + url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + + url_date_year + '/' + url_date_day + url_date_month + '/index.php3') articles = self.parse_pages(url) if articles: - title = url_date_weekday_pl + " " + url_date_day + \ - "." + url_date_month + "." + url_date_year + title = (url_date_weekday_pl + ' ' + url_date_day + + '.' + url_date_month + '.' + url_date_year) feeds.append((title, articles)) else: sectors = self.get_sectors(url) for subpage in sectors: - title = url_date_weekday_pl + " " + url_date_day + "." + \ - url_date_month + "." + url_date_year + " - " + subpage.string - url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \ - "/" + url_date_day + url_date_month + \ - "/" + subpage['href'] + title = (url_date_weekday_pl + ' ' + url_date_day + '.' + + url_date_month + '.' + url_date_year + ' - ' + subpage.string) + url = ('http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year + + '/' + url_date_day + url_date_month + '/' + subpage['href']) print(url) articles = self.parse_pages(url) if articles: @@ -91,9 +89,8 @@ class brewiarz(BasicNewsRecipe): sublinks = ol.findAll(name='a') for sublink in sublinks: link_title = self.tag_to_string( - link) + " - " + self.tag_to_string(sublink) - link_url_print = re.sub( - 'php3', 'php3?kr=_druk&wr=lg&', sublink['href']) + link) + ' - ' + self.tag_to_string(sublink) + link_url_print = sublink['href'].replace('php3', 'php3?kr=_druk&wr=lg&') link_url = url[:-10] + link_url_print current_articles.append({'title': link_title, 'url': link_url, 'description': '', 'date': ''}) @@ -102,8 +99,7 @@ class brewiarz(BasicNewsRecipe): continue else: link_title = self.tag_to_string(link) - link_url_print = re.sub( - 'php3', 'php3?kr=_druk&wr=lg&', link['href']) + link_url_print = link['href'].replace('php3', 'php3?kr=_druk&wr=lg&') link_url = url[:-10] + link_url_print current_articles.append({'title': link_title, 'url': link_url, 'description': '', 'date': ''}) @@ -145,7 +141,7 @@ class brewiarz(BasicNewsRecipe): if x == tag: break else: - print("Can't find", tag, "in", tag.parent) + print("Can't find", tag, 'in', tag.parent) continue for r in reversed(tag.contents): tag.parent.insert(i, r) diff --git a/recipes/brigitte_de.recipe b/recipes/brigitte_de.recipe index 0033cd88fd..8810e4b636 100644 --- a/recipes/brigitte_de.recipe +++ b/recipes/brigitte_de.recipe @@ -16,7 +16,7 @@ class AdvancedUserRecipe(BasicNewsRecipe): cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' - remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa + remove_tags = [dict(attrs={'class': ['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), # noqa: E501 dict(id=['header', 'artTools', 'context', 'interact', 'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']), dict(name=['hjtrs', 'kud'])] diff --git a/recipes/business_insider.recipe b/recipes/business_insider.recipe index d04913ea17..e08b84b1e0 100644 --- a/recipes/business_insider.recipe +++ b/recipes/business_insider.recipe @@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index f4dc6d6de1..439284b28d 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -15,8 +15,6 @@ class BusinessStandardPrint(BasicNewsRecipe): language = 'en_IN' masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' encoding = 'utf-8' - resolve_internal_links = True - remove_empty_feeds = True no_stylesheets = True remove_javascript = True @@ -64,7 +62,7 @@ class BusinessStandardPrint(BasicNewsRecipe): if dt.weekday() == 6: self.log.warn( 'Business Standard Does Not Have A Print Publication On Sunday. The Reports' - ' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.' + " And Columns On This Page Today Appeared In The Newspaper's Saturday Edition." ) url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today raw = self.index_to_soup(url, raw=True) diff --git a/recipes/business_today.recipe b/recipes/business_today.recipe index b8188de06a..cab351e15a 100644 --- a/recipes/business_today.recipe +++ b/recipes/business_today.recipe @@ -90,7 +90,7 @@ class BT(BasicNewsRecipe): # Insert feeds in specified order, if available - feedSort = ['Editor\'s Note', 'Editors note'] + feedSort = ["Editor's Note", 'Editors note'] for i in feedSort: if i in sections: feeds.append((i, sections[i])) @@ -98,8 +98,7 @@ class BT(BasicNewsRecipe): # Done with the sorted feeds for i in feedSort: - if i in sections: - del sections[i] + sections.pop(i, None) # Append what is left over... diff --git a/recipes/cacm.recipe b/recipes/cacm.recipe index aee3c68eca..201312128a 100644 --- a/recipes/cacm.recipe +++ b/recipes/cacm.recipe @@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class CACM(BasicNewsRecipe): - title = "ACM CACM Magazine" - description = "Published on day 1 of every month." + title = 'ACM CACM Magazine' + description = 'Published on day 1 of every month.' language = 'en' oldest_article = 30 max_articles_per_feed = 100 @@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe): ] def get_cover_url(self): - """ + ''' Parse out cover URL from cover page. Example: From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668 Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg - """ + ''' - soup = self.index_to_soup("https://cacm.acm.org/") - a_img = soup.find("a", class_="menuCover") - img_url = a_img.img["src"] - img_url = img_url.split("?")[0] - img_url = img_url.replace(".large", "") + soup = self.index_to_soup('https://cacm.acm.org/') + a_img = soup.find('a', class_='menuCover') + img_url = a_img.img['src'] + img_url = img_url.split('?')[0] + img_url = img_url.replace('.large', '') return img_url diff --git a/recipes/calcalist.recipe b/recipes/calcalist.recipe index d794946412..32f80cdb25 100644 --- a/recipes/calcalist.recipe +++ b/recipes/calcalist.recipe @@ -9,8 +9,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): title = u'Calcalist' language = 'he' __author__ = 'marbs' - extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa - simultaneous_downloads = 5 + extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa: E501 remove_javascript = True timefmt = '[%a, %d %b, %Y]' oldest_article = 1 @@ -23,34 +22,33 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): dict(name='div', attrs={'class': 'ArticleBodyComponent'}), ] remove_tags = [dict(name='p', attrs={'text': [' ']})] - max_articles_per_feed = 100 preprocess_regexps = [ (re.compile(r'

 

', re.DOTALL | re.IGNORECASE), lambda match: '') ] feeds = [ - (u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"), - (u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"), - (u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"), - (u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"), - (u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"), - (u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"), - (u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"), - (u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"), - (u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"), - (u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"), - (u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"), - (u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"), - (u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"), - (u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"), - (u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"), - (u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"), - (u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"), - (u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"), - (u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"), - (u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"), - (u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"), - (u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"), - (u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"), - (u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml") + (u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'), + (u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'), + (u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'), + (u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'), + (u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'), + (u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'), + (u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'), + (u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'), + (u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'), + (u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'), + (u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'), + (u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'), + (u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'), + (u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'), + (u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'), + (u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'), + (u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'), + (u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'), + (u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'), + (u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'), + (u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'), + (u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'), + (u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'), + (u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml') ] diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe index 5ab1c722fa..eef67df749 100644 --- a/recipes/calgary_herald.recipe +++ b/recipes/calgary_herald.recipe @@ -60,20 +60,20 @@ class CanWestPaper(BasicNewsRecipe): ] # un-comment the following six lines for the Vancouver Province - # title = u'Vancouver Province' - # url_prefix = 'http://www.theprovince.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' - # logo_url = 'vplogo.jpg' - # fp_tag = 'CAN_TP' + # # title = u'Vancouver Province' + # # url_prefix = 'http://www.theprovince.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' + # # logo_url = 'vplogo.jpg' + # # fp_tag = 'CAN_TP' # un-comment the following six lines for the Vancouver Sun - # title = u'Vancouver Sun' - # url_prefix = 'http://www.vancouversun.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' - # logo_url = 'vslogo.jpg' - # fp_tag = 'CAN_VS' + # # title = u'Vancouver Sun' + # # url_prefix = 'http://www.vancouversun.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' + # # logo_url = 'vslogo.jpg' + # # fp_tag = 'CAN_VS' # un-comment the following six lines for the Calgary Herald title = u'Calgary Herald' @@ -90,7 +90,7 @@ class CanWestPaper(BasicNewsRecipe): # # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' # # logo_url = 'ejlogo.jpg' # # fp_tag = 'CAN_EJ' - # + # un-comment the following six lines for the Ottawa Citizen # # title = u'Ottawa Citizen' # # url_prefix = 'http://www.ottawacitizen.com' @@ -98,7 +98,7 @@ class CanWestPaper(BasicNewsRecipe): # # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' # # logo_url = 'oclogo.jpg' # # fp_tag = 'CAN_OC' - # + # un-comment the following six lines for the Montreal Gazette # # title = u'Montreal Gazette' # # url_prefix = 'http://www.montrealgazette.com' @@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe): .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocredit { font-size: xx-small; font-weight: normal; }''' - keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] + keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})] remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( @@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe): name='div', attrs={'class': 'copyright'}), dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'id': 'soundoff'}), - dict(name='div', attrs={'id': re.compile('flyer')}), + dict(name='div', attrs={'id': re.compile(r'flyer')}), dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] def get_cover_url(self): @@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe): except: while daysback < 7: cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \ - str((date.today() - timedelta(days=daysback)).day) + \ - '/lg/' + self.fp_tag + '.jpg' + str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) @@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe): continue break if daysback == 7: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover def fixChars(self, string): # Replace lsquo (\x91) - fixed = re.sub("\x91", "‘", string) + fixed = string.replace('\x91', '‘') # Replace rsquo (\x92) - fixed = re.sub("\x92", "’", fixed) + fixed = fixed.replace('\x92', '’') # Replace ldquo (\x93) - fixed = re.sub("\x93", "“", fixed) + fixed = fixed.replace('\x93', '“') # Replace rdquo (\x94) - fixed = re.sub("\x94", "”", fixed) + fixed = fixed.replace('\x94', '”') # Replace ndash (\x96) - fixed = re.sub("\x96", "–", fixed) + fixed = fixed.replace('\x96', '–') # Replace mdash (\x97) - fixed = re.sub("\x97", "—", fixed) - fixed = re.sub("’", "’", fixed) + fixed = fixed.replace('\x97', '—') + fixed = fixed.replace('’', '’') return fixed def massageNCXText(self, description): @@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe): divtags = soup.findAll('div', attrs={'id': ''}) if divtags: for div in divtags: - del(div['id']) + del div['id'] pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps @@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe): if url.startswith('/'): url = self.url_prefix + url if not url.startswith(self.url_prefix): - print("Rejected " + url) + print('Rejected ' + url) return if url in self.url_list: - print("Rejected dup " + url) + print('Rejected dup ' + url) return self.url_list.append(url) title = self.tag_to_string(atag, False) @@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe): return dtag = adiv.find('div', 'content') description = '' - print("URL " + url) - print("TITLE " + title) + print('URL ' + url) + print('TITLE ' + title) if dtag is not None: stag = dtag.span if stag is not None: @@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe): description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) - print("DESCRIPTION: " + description) + print('DESCRIPTION: ' + description) if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) def parse_web_index(key, keyurl): - print("Section: " + key + ': ' + self.url_prefix + keyurl) + print('Section: ' + key + ': ' + self.url_prefix + keyurl) try: soup = self.index_to_soup(self.url_prefix + keyurl) except: - print("Section: " + key + ' NOT FOUND') + print('Section: ' + key + ' NOT FOUND') return ans.append(key) mainsoup = soup.find('div', 'bodywrapper') @@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe): for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}): handle_article(wdiv, key) - for (k, url) in self.postmedia_index_pages: + for k,url in self.postmedia_index_pages: parse_web_index(k, url) ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/canardpc.recipe b/recipes/canardpc.recipe index be8a3267b4..839165dd63 100644 --- a/recipes/canardpc.recipe +++ b/recipes/canardpc.recipe @@ -4,7 +4,6 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1271446252(BasicNewsRecipe): title = u'CanardPC' oldest_article = 7 - max_articles_per_feed = 100 language = 'fr' __author__ = 'zorgluf' max_articles_per_feed = 25 diff --git a/recipes/capital_gr.recipe b/recipes/capital_gr.recipe index 2e716b79b0..848a24f41b 100644 --- a/recipes/capital_gr.recipe +++ b/recipes/capital_gr.recipe @@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), dict(name='p'), - dict(name='span', attrs={'id': ["textbody"]}) + dict(name='span', attrs={'id': ['textbody']}) ] # 3 posts seemed to have utf8 encoding @@ -36,6 +36,6 @@ class Capital(BasicNewsRecipe): 'http://www.capital.gr/articles/articlesrss.asp?catid=4'), (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F', 'http://www.capital.gr/articles/articlesrss.asp?catid=6'), - (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa + (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', # noqa: E501 'http://www.capital.gr/articles/articlesrss.asp?catid=8'), ] diff --git a/recipes/caravan_magazine.recipe b/recipes/caravan_magazine.recipe index 115424802d..938fc7b393 100644 --- a/recipes/caravan_magazine.recipe +++ b/recipes/caravan_magazine.recipe @@ -15,6 +15,7 @@ def absurl(x): x = 'https://caravanmagazine.in' + x return x + def safe_dict(data, *names): ans = data for x in names: @@ -49,6 +50,7 @@ def parse_body(x): yield from parse_body(p) yield '

' + def parse_p(p): if p.get('type', '') == 'text': if 'marks' in p: @@ -96,7 +98,7 @@ class CaravanMagazine(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self, *args, **kw) if not self.username or not self.password: return br - data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}}) + data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}}) if not isinstance(data, bytes): data = data.encode('utf-8') rq = Request( @@ -138,7 +140,7 @@ class CaravanMagazine(BasicNewsRecipe): d = self.recipe_specific_options.get('date') if d and isinstance(d, str): x = d.split('-') - inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}}) + inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}}) api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='') raw = json.loads(self.index_to_soup(api, raw=True)) @@ -174,7 +176,7 @@ class CaravanMagazine(BasicNewsRecipe): def print_version(self, url): slug = urlparse(url).path - inp = json.dumps({"0":{"json":{"slug":slug}}}) + inp = json.dumps({'0':{'json':{'slug':slug}}}) return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='') def preprocess_raw_html(self, raw, url): @@ -211,6 +213,6 @@ class CaravanMagazine(BasicNewsRecipe): for x in art_cont['premiumContent']: premium_cont += '\n' + ''.join(parse_body(x)) - return '
' \ - + cat + title + desc + auth + lede + free_cont + premium_cont + \ - '
' + return ('
' + + cat + title + desc + auth + lede + free_cont + premium_cont + + '
') diff --git a/recipes/cato.recipe b/recipes/cato.recipe index c2d7332f17..234ad3edba 100644 --- a/recipes/cato.recipe +++ b/recipes/cato.recipe @@ -5,9 +5,9 @@ from calibre.web.feeds.news import BasicNewsRecipe class CATOInstitute(BasicNewsRecipe): title = u'The CATO Institute' - description = "The Cato Institute is a public policy research organization — a think tank — \ -dedicated to the principles of individual liberty, limited government, free markets and peace.\ - Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues." + description = ('The Cato Institute is a public policy research organization — a think tank — ' + 'dedicated to the principles of individual liberty, limited government, free markets and peace. ' + 'Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues.') __author__ = '_reader' __date__ = '05 July 2012' __version__ = '1.0' diff --git a/recipes/cbn.recipe b/recipes/cbn.recipe index 36a4f5a255..834f5d57ae 100644 --- a/recipes/cbn.recipe +++ b/recipes/cbn.recipe @@ -31,7 +31,6 @@ class CBN(BasicNewsRecipe): remove_javascript = True use_embedded_content = False no_stylesheets = True - language = 'en' encoding = 'iso-8859-1' conversion_options = {'linearize_tables': True} diff --git a/recipes/cedar.recipe b/recipes/cedar.recipe index f3625509f0..17f6199d09 100644 --- a/recipes/cedar.recipe +++ b/recipes/cedar.recipe @@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Cedar(BasicNewsRecipe): title = u'\u041A\u0435\u0434\u0440' - description = u'\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043C\u0435\u0434\u0438\u0430 \u043E\u0431 \u043E\u043A\u0440\u0443\u0436\u0430\u044E\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u0435' # noqa + description = u'\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043C\u0435\u0434\u0438\u0430 \u043E\u0431 \u043E\u043A\u0440\u0443\u0436\u0430\u044E\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u0435' # noqa: E501 __author__ = 'bugmen00t' publication_type = 'blog' oldest_article = 30 diff --git a/recipes/cherta.recipe b/recipes/cherta.recipe index 8496a543c4..62ae6a18dc 100644 --- a/recipes/cherta.recipe +++ b/recipes/cherta.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Cherta(BasicNewsRecipe): title = '\u0427\u0435\u0440\u0442\u0430' __author__ = 'bugmen00t' - description = ' \u0418\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0435, \u0432\u0430\u0436\u043D\u044B\u0435 \u0438 \u0433\u043B\u0443\u0431\u043E\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043F\u0440\u043E \u043D\u0430\u0441\u0438\u043B\u0438\u0435 \u0438 \u043D\u0435\u0440\u0430\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.' # noqa + description = ' \u0418\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0435, \u0432\u0430\u0436\u043D\u044B\u0435 \u0438 \u0433\u043B\u0443\u0431\u043E\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043F\u0440\u043E \u043D\u0430\u0441\u0438\u043B\u0438\u0435 \u0438 \u043D\u0435\u0440\u0430\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.' # noqa: E501 publisher = 'cherta.media' category = 'blog' cover_url = u'https://cherta.media/wp-content/uploads/2022/01/cherta_snippet2.png' @@ -22,7 +22,7 @@ class Cherta(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class':'single-page__footer-info'}) - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'single-content-link'}), dict(name='div', attrs={'class': 'single-page__footer-info_links clearfix'}), dict(name='div', attrs={'class': 'single-article-tags-wrapper'}) diff --git a/recipes/chicago_tribune.recipe b/recipes/chicago_tribune.recipe index 70cf0958ee..66b9969af7 100644 --- a/recipes/chicago_tribune.recipe +++ b/recipes/chicago_tribune.recipe @@ -67,7 +67,6 @@ class ChicagoTribune(BasicNewsRecipe): feeds.append({'title': title, 'url': url}) return [('Articles', feeds)] - def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] diff --git a/recipes/chr_mon.recipe b/recipes/chr_mon.recipe index 650c699fad..c75fc5fd4c 100644 --- a/recipes/chr_mon.recipe +++ b/recipes/chr_mon.recipe @@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class CSMonitor(BasicNewsRecipe): title = 'The Christian Science Monitor - daily' __author__ = 'Darko Miletic' - description = 'The Christian Science Monitor is an international news organization that delivers thoughtful, global coverage via its website, weekly magazine, daily news briefing, and email newsletters.' # noqa + description = 'The Christian Science Monitor is an international news organization that delivers thoughtful, global coverage via its website, weekly magazine, daily news briefing, and email newsletters.' # noqa: E501 publisher = 'The Christian Science Monitor' category = 'news, politics, USA' oldest_article = 2 @@ -24,7 +24,7 @@ class CSMonitor(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newspaper' masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' - extra_css = """ + extra_css = ''' body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} .head {font-family: Georgia,"Times New Roman",Times,serif} @@ -32,19 +32,19 @@ class CSMonitor(BasicNewsRecipe): .hide{display: none} .sLoc{font-weight: bold} ul{list-style-type: none} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } remove_tags = [ - dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile('(^|| )podStoryRel($|| )', re.DOTALL)}), dict( + dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL)}), dict( attrs={'class': ['bottom-rel', 'hide']}), dict(attrs={'id': ['pgallerycarousel_enlarge', 'pgallerycarousel_related']}) ] keep_only_tags = [ dict(name='h1', attrs={'class': 'head'}), dict(name='h2', attrs={'class': 'subhead'}), dict(attrs={'class': [ - 'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) + 'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)}) ] remove_attributes = ['xmlns:fb'] @@ -74,11 +74,11 @@ class CSMonitor(BasicNewsRecipe): nurl = 'http://www.csmonitor.com' + nexttag['href'] soup2 = self.index_to_soup(nurl) texttag = soup2.find( - attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) + attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)}) if texttag: appendtag = soup.find( - attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) - for citem in texttag.findAll(attrs={'class': [re.compile('(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}): + attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)}) + for citem in texttag.findAll(attrs={'class': [re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}): citem.extract() self.append_page(soup2) texttag.extract() diff --git a/recipes/christian_post.recipe b/recipes/christian_post.recipe index e924984dac..9ab4a639d2 100644 --- a/recipes/christian_post.recipe +++ b/recipes/christian_post.recipe @@ -9,7 +9,6 @@ class ChristianPost(BasicNewsRecipe): __author__ = 'sexymax15' description = 'Homepage' language = 'en' - no_stylesheets = True use_embedded_content = False oldest_article = 30 max_articles_per_feed = 15 diff --git a/recipes/chronicle_higher_ed.recipe b/recipes/chronicle_higher_ed.recipe index 619dbb287c..c61c54686b 100644 --- a/recipes/chronicle_higher_ed.recipe +++ b/recipes/chronicle_higher_ed.recipe @@ -39,7 +39,7 @@ class Chronicle(BasicNewsRecipe): # Go to the issue soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/') issue = soup0.find('ul', attrs={'class': 'feature-promo-list'}).li - issueurl = "http://chronicle.com" + issue.a['href'] + issueurl = 'http://chronicle.com' + issue.a['href'] # Find date dates = self.tag_to_string(issue.a).split(': ')[-1] @@ -47,12 +47,12 @@ class Chronicle(BasicNewsRecipe): # Find cover cover = soup0.find('div', attrs={ - 'class': 'side-content'}).find(attrs={'src': re.compile("photos/biz/Current")}) + 'class': 'side-content'}).find(attrs={'src': re.compile(r'photos/biz/Current')}) if cover is not None: - if "chronicle.com" in cover['src']: + if 'chronicle.com' in cover['src']: self.cover_url = cover['src'] else: - self.cover_url = "http://chronicle.com" + cover['src'] + self.cover_url = 'http://chronicle.com' + cover['src'] # Go to the main body soup = self.index_to_soup(issueurl) div = soup.find('div', attrs={'id': 'article-body'}) @@ -64,7 +64,7 @@ class Chronicle(BasicNewsRecipe): a = post.find('a', href=True) if a is not None: title = self.tag_to_string(a) - url = "http://chronicle.com" + a['href'].strip() + url = 'http://chronicle.com' + a['href'].strip() sectiontitle = post.findPrevious('h3') if sectiontitle is None: sectiontitle = post.findPrevious('h4') @@ -77,7 +77,7 @@ class Chronicle(BasicNewsRecipe): if section_title not in feeds: feeds[section_title] = [] feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] + ans = list(feeds.items()) return ans def preprocess_html(self, soup): diff --git a/recipes/cicero.recipe b/recipes/cicero.recipe index 4a4acd2507..61dc3e6b71 100644 --- a/recipes/cicero.recipe +++ b/recipes/cicero.recipe @@ -15,27 +15,27 @@ class BasicUserRecipe1316245412(BasicNewsRecipe): no_stylesheets = True auto_cleanup = False -# remove_javascript = True + # remove_javascript = True remove_tags = [ - dict(name='div', attrs={'id': ["header", "navigation", "skip-link", - "header-print", "header-print-url", "meta-toolbar", "footer"]}), - dict(name='div', attrs={'class': ["region region-sidebar-first column sidebar", "breadcrumb", - "breadcrumb-title", "meta", "comment-wrapper", - "field field-name-field-show-teaser-right field-type-list-boolean field-label-above", - "page-header", - "view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1", - "pagination", - "view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1", - "view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2", # 2011-09-23 - "view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2", # 2011-09-23 + dict(name='div', attrs={'id': ['header', 'navigation', 'skip-link', + 'header-print', 'header-print-url', 'meta-toolbar', 'footer']}), + dict(name='div', attrs={'class': ['region region-sidebar-first column sidebar', 'breadcrumb', + 'breadcrumb-title', 'meta', 'comment-wrapper', + 'field field-name-field-show-teaser-right field-type-list-boolean field-label-above', + 'page-header', + 'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1', + 'pagination', + 'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1', + 'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2', # 2011-09-23 + 'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2', # 2011-09-23 ]}), - dict(name='div', attrs={'title': ["Dossier Auswahl"]}), - dict(name='h2', attrs={'class': ["title comment-form"]}), + dict(name='div', attrs={'title': ['Dossier Auswahl']}), + dict(name='h2', attrs={'class': ['title comment-form']}), dict(name='form', attrs={ - 'class': ["comment-form user-info-from-cookie"]}), + 'class': ['comment-form user-info-from-cookie']}), dict(name='table', attrs={ - 'class': ["mcx-social-horizontal", "page-header"]}), + 'class': ['mcx-social-horizontal', 'page-header']}), ] feeds = [ @@ -51,6 +51,6 @@ class BasicUserRecipe1316245412(BasicNewsRecipe): def print_version(self, url): return url + '?print' -# def get_cover_url(self): -# return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to -# find a good logo on their home page! + # def get_cover_url(self): + # return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to + # find a good logo on their home page! diff --git a/recipes/cincinnati_enquirer.recipe b/recipes/cincinnati_enquirer.recipe index 4e2409024c..2fcad1c635 100644 --- a/recipes/cincinnati_enquirer.recipe +++ b/recipes/cincinnati_enquirer.recipe @@ -34,7 +34,7 @@ class AdvancedUserRecipe1234144423(BasicNewsRecipe): dict(name='div', attrs={'class': ['padding', 'sidebar-photo', 'blog caitlin']})] remove_tags = [ - dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ["pluckcomments", "StoryChat"]}), dict( + dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ['pluckcomments', 'StoryChat']}), dict( name='div', attrs={'class': ['articleflex-container', ]}), dict(name='p', attrs={'class': ['posted', 'tags']}) ] diff --git a/recipes/ciperchile.recipe b/recipes/ciperchile.recipe index 2aaa2a761d..c503b218bc 100644 --- a/recipes/ciperchile.recipe +++ b/recipes/ciperchile.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class CiperChile(BasicNewsRecipe): title = 'CIPER Chile' __author__ = 'Darko Miletic' - description = 'El Centro de Investigacion e Informacion Periodistica (CIPER) es una institucion independiente que desarrolla reportajes de investigacion de acuerdo a principios de maxima calidad e integridad profesional. Para lograr dicho objetivo, los profesionales de CIPER incorporan a las tecnicas propias del reporteo el uso sistematico de las leyes chilenas que norman el libre acceso a la informacion, de manera que los documentos que se obtengan por esta via esten puestos a disposicion del publico sin restricciones.' # noqa + description = 'El Centro de Investigacion e Informacion Periodistica (CIPER) es una institucion independiente que desarrolla reportajes de investigacion de acuerdo a principios de maxima calidad e integridad profesional. Para lograr dicho objetivo, los profesionales de CIPER incorporan a las tecnicas propias del reporteo el uso sistematico de las leyes chilenas que norman el libre acceso a la informacion, de manera que los documentos que se obtengan por esta via esten puestos a disposicion del publico sin restricciones.' # noqa: E501 publisher = 'CIPER' category = 'news, politics, Chile' oldest_article = 15 @@ -23,14 +23,14 @@ class CiperChile(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'blog' masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png' - extra_css = """ + extra_css = ''' body{font-family: Arial,sans-serif} .excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em} .author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small} .date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey} .epigrafe{font-size: small; color: grey} img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe index 9cac8328ed..898a10a7f8 100644 --- a/recipes/clarin.recipe +++ b/recipes/clarin.recipe @@ -44,7 +44,7 @@ class Clarin(BasicNewsRecipe): # To get all the data (images) auto_cleanup = False - extra_css = """ + extra_css = ''' h1#title { line-height: 1em; margin: 0 0 .5em 0; @@ -64,7 +64,7 @@ class Clarin(BasicNewsRecipe): font-size: .9em; margin-bottom: .5em; } - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -85,11 +85,11 @@ class Clarin(BasicNewsRecipe): self.oldest_article = float(d) keep_only_tags = [ - dict(name='p' , attrs={'class' : 'volanta'}), - dict(name='h1' , attrs={'id': 'title'}), - dict(name='div', attrs={'class' : 'bajada'}), - dict(name='div', attrs={'id' : 'galeria-trigger'}), - dict(name='div', attrs={'class' : 'body-nota'}) + dict(name='p', attrs={'class': 'volanta'}), + dict(name='h1', attrs={'id': 'title'}), + dict(name='div', attrs={'class': 'bajada'}), + dict(name='div', attrs={'id': 'galeria-trigger'}), + dict(name='div', attrs={'class': 'body-nota'}) ] @@ -138,7 +138,7 @@ class Clarin(BasicNewsRecipe): 'ingresar_ingresar_email_paseInputComponent': self.username, 'ingresar_ingresar_palabraClave_paseInputComponent': self.password, 'ingresar_ingresar_ingresar_paseButton': 'Ingresar', - 'javax.faces.ViewState': 'e1s1' # noqa + 'javax.faces.ViewState': 'e1s1' }) br.open(self.LOGIN, data) return br diff --git a/recipes/cm_journal.recipe b/recipes/cm_journal.recipe index 31df657e7b..704b35bda8 100644 --- a/recipes/cm_journal.recipe +++ b/recipes/cm_journal.recipe @@ -12,7 +12,7 @@ class CMJornal_pt(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'pt' - extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} ' # noqa + extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} ' # noqa: E501 conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/cnetjapan.recipe b/recipes/cnetjapan.recipe index e1e4d79827..6dbcb927e5 100644 --- a/recipes/cnetjapan.recipe +++ b/recipes/cnetjapan.recipe @@ -25,16 +25,16 @@ class CNetJapan(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(id="contents_l") + remove_tags_before = dict(id='contents_l') remove_tags = [ - {'class': "social_bkm_share"}, - {'class': "social_bkm_print"}, - {'class': "block20 clearfix"}, - dict(name="div", attrs={'id': 'bookreview'}), - {'class': "tag_left_ttl"}, - {'class': "tag_right"} + {'class': 'social_bkm_share'}, + {'class': 'social_bkm_print'}, + {'class': 'block20 clearfix'}, + dict(name='div', attrs={'id': 'bookreview'}), + {'class': 'tag_left_ttl'}, + {'class': 'tag_right'} ] - remove_tags_after = {'class': "block20"} + remove_tags_after = {'class': 'block20'} def parse_feeds(self): diff --git a/recipes/cnetjapan_digital.recipe b/recipes/cnetjapan_digital.recipe index 9cb2a148b4..db10032de9 100644 --- a/recipes/cnetjapan_digital.recipe +++ b/recipes/cnetjapan_digital.recipe @@ -25,16 +25,16 @@ class CNetJapanDigital(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(id="contents_l") + remove_tags_before = dict(id='contents_l') remove_tags = [ - {'class': "social_bkm_share"}, - {'class': "social_bkm_print"}, - {'class': "block20 clearfix"}, - dict(name="div", attrs={'id': 'bookreview'}), - {'class': "tag_left_ttl"}, - {'class': "tag_right"} + {'class': 'social_bkm_share'}, + {'class': 'social_bkm_print'}, + {'class': 'block20 clearfix'}, + dict(name='div', attrs={'id': 'bookreview'}), + {'class': 'tag_left_ttl'}, + {'class': 'tag_right'} ] - remove_tags_after = {'class': "block20"} + remove_tags_after = {'class': 'block20'} def parse_feeds(self): diff --git a/recipes/cnetjapan_release.recipe b/recipes/cnetjapan_release.recipe index 4b85d24b9b..1cf29aef02 100644 --- a/recipes/cnetjapan_release.recipe +++ b/recipes/cnetjapan_release.recipe @@ -25,15 +25,15 @@ class CNetJapanRelease(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(id="contents_l") + remove_tags_before = dict(id='contents_l') remove_tags = [ - {'class': "social_bkm_share"}, - {'class': "social_bkm_print"}, - {'class': "block20 clearfix"}, - dict(name="div", attrs={'id': 'bookreview'}), - {'class': "tag_left_ttl"} + {'class': 'social_bkm_share'}, + {'class': 'social_bkm_print'}, + {'class': 'block20 clearfix'}, + dict(name='div', attrs={'id': 'bookreview'}), + {'class': 'tag_left_ttl'} ] - remove_tags_after = {'class': "block20"} + remove_tags_after = {'class': 'block20'} def parse_feeds(self): diff --git a/recipes/cnetnews.recipe b/recipes/cnetnews.recipe index a0f9607d12..a98034ca21 100644 --- a/recipes/cnetnews.recipe +++ b/recipes/cnetnews.recipe @@ -27,7 +27,7 @@ def classes(classes): class CnetNews(BasicNewsRecipe): title = 'CNET News' __author__ = 'Kovid Goyal' - description = 'Tech news and business reports by CNET News. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media.' # noqa + description = 'Tech news and business reports by CNET News. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media.' # noqa: E501 publisher = 'CNET' category = 'news, IT, USA' encoding = 'utf-8' @@ -56,7 +56,7 @@ class CnetNews(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), dict(section='author'), - dict(id=["article-body", 'cnetReview']), + dict(id=['article-body', 'cnetReview']), dict(attrs={'class': 'deal-content'}), ] diff --git a/recipes/cnn.recipe b/recipes/cnn.recipe index 9089c9d2bb..b8a80131cc 100644 --- a/recipes/cnn.recipe +++ b/recipes/cnn.recipe @@ -72,7 +72,7 @@ class CNN(BasicNewsRecipe): try: br.open(masthead) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') masthead = None return masthead diff --git a/recipes/coda.recipe b/recipes/coda.recipe index e1538704b6..2e2c80ab63 100644 --- a/recipes/coda.recipe +++ b/recipes/coda.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Coda(BasicNewsRecipe): title = 'Coda' __author__ = 'bugmen00t' - description = 'Coda Story reports on major currents shaping our world from disinformation to authoritarian technologies to the war on science. Coda stays on these stories to reveal why they matter, how they are connected and where they are heading next.' # noqa + description = 'Coda Story reports on major currents shaping our world from disinformation to authoritarian technologies to the war on science. Coda stays on these stories to reveal why they matter, how they are connected and where they are heading next.' # noqa: E501 publisher = 'Natalia Antelava & Ilan Greenberg' category = 'blog' cover_url = u'https://www.codastory.com/wp-content/uploads/2021/05/AT_thumbnail_512x512.png' diff --git a/recipes/coda_ru.recipe b/recipes/coda_ru.recipe index 6d65dd6e32..45147ceae6 100644 --- a/recipes/coda_ru.recipe +++ b/recipes/coda_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Coda(BasicNewsRecipe): title = 'Coda \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u043E\u043C' __author__ = 'bugmen00t' - description = 'Coda - \u043C\u0435\u0434\u0438\u0430, \u043A\u043E\u0442\u043E\u0440\u043E\u0435 \u0432\u044B\u044F\u0432\u043B\u044F\u0435\u0442 \u0441\u0432\u044F\u0437\u0438 \u043C\u0435\u0436\u0434\u0443 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0438 \u043F\u0440\u043E\u0434\u043E\u043B\u0436\u0430\u0435\u0442 \u0441\u043B\u0435\u0434\u0438\u0442\u044C \u0437\u0430 \u0438\u0441\u0442\u043E\u0440\u0438\u044F\u043C\u0438 \u0434\u0430\u0436\u0435 \u043F\u043E\u0441\u043B\u0435 \u0442\u043E\u0433\u043E, \u043A\u0430\u043A \u043E\u043D\u0438 \u043E\u043D\u0438 \u043F\u0440\u043E\u043F\u0430\u043B\u0438 \u0438\u0437 \u043F\u043E\u0432\u0435\u0441\u0442\u043A\u0438. Coda \u043F\u043E\u0433\u0440\u0443\u0436\u0430\u0435\u0442\u0441\u044F \u043D\u0435 \u0442\u043E\u043B\u044C\u043A\u043E \u0432 \u0441\u0430\u043C\u0438 \u043A\u0440\u0438\u0437\u0438\u0441\u044B, \u043D\u043E \u0438 \u0432 \u043A\u043E\u043D\u0442\u0435\u043A\u0441\u0442, \u043A\u043E\u0442\u043E\u0440\u044B\u0439 \u0438\u0445 \u043E\u043A\u0440\u0443\u0436\u0430\u0435\u0442' # noqa + description = 'Coda - \u043C\u0435\u0434\u0438\u0430, \u043A\u043E\u0442\u043E\u0440\u043E\u0435 \u0432\u044B\u044F\u0432\u043B\u044F\u0435\u0442 \u0441\u0432\u044F\u0437\u0438 \u043C\u0435\u0436\u0434\u0443 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0438 \u043F\u0440\u043E\u0434\u043E\u043B\u0436\u0430\u0435\u0442 \u0441\u043B\u0435\u0434\u0438\u0442\u044C \u0437\u0430 \u0438\u0441\u0442\u043E\u0440\u0438\u044F\u043C\u0438 \u0434\u0430\u0436\u0435 \u043F\u043E\u0441\u043B\u0435 \u0442\u043E\u0433\u043E, \u043A\u0430\u043A \u043E\u043D\u0438 \u043E\u043D\u0438 \u043F\u0440\u043E\u043F\u0430\u043B\u0438 \u0438\u0437 \u043F\u043E\u0432\u0435\u0441\u0442\u043A\u0438. Coda \u043F\u043E\u0433\u0440\u0443\u0436\u0430\u0435\u0442\u0441\u044F \u043D\u0435 \u0442\u043E\u043B\u044C\u043A\u043E \u0432 \u0441\u0430\u043C\u0438 \u043A\u0440\u0438\u0437\u0438\u0441\u044B, \u043D\u043E \u0438 \u0432 \u043A\u043E\u043D\u0442\u0435\u043A\u0441\u0442, \u043A\u043E\u0442\u043E\u0440\u044B\u0439 \u0438\u0445 \u043E\u043A\u0440\u0443\u0436\u0430\u0435\u0442' # noqa: E501 publisher = 'Natalia Antelava & Ilan Greenberg' category = 'blog' cover_url = u'https://www.codastory.com/wp-content/uploads/2021/05/AT_thumbnail_512x512.png' @@ -22,7 +22,7 @@ class Coda(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class': 'article'}) - remove_tags = [ + remove_tags = [ dict(name='li', attrs={'class': 'material-meta__type'}), dict(name='div', attrs={'class': 'more'}) ] diff --git a/recipes/colta.recipe b/recipes/colta.recipe index ea86cda905..d2ce698c40 100644 --- a/recipes/colta.recipe +++ b/recipes/colta.recipe @@ -12,7 +12,6 @@ class Colta(BasicNewsRecipe): publisher = 'OpenSpace.ru' category = 'news' cover_url = u'http://www.colta.ru/assets/logo-afb684c3d35fc1f6f103f9fb638c8ec1.png' - language = 'ru' no_stylesheets = True remove_javascript = True diff --git a/recipes/common_dreams.recipe b/recipes/common_dreams.recipe index 37f1c9fc4d..f693153402 100644 --- a/recipes/common_dreams.recipe +++ b/recipes/common_dreams.recipe @@ -33,24 +33,24 @@ class CommonDreams(BasicNewsRecipe): remove_javascript = True # Specify extra CSS - overrides ALL other CSS (IE. Added last). - extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ - .introduction, .first { font-weight: bold; } \ - .cross-head { font-weight: bold; font-size: 125%; } \ - .cap, .caption { display: block; font-size: 80%; font-style: italic; } \ - .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \ - .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ - .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \ - font-size: 80%; font-style: italic; margin: 1px auto; } \ - .story-date, .published { font-size: 80%; } \ - table { width: 100%; } \ - td img { display: block; margin: 5px auto; } \ - ul { padding-top: 10px; } \ - ol { padding-top: 10px; } \ - li { padding-top: 5px; padding-bottom: 5px; } \ - h1 { font-size: 175%; font-weight: bold; } \ - h2 { font-size: 150%; font-weight: bold; } \ - h3 { font-size: 125%; font-weight: bold; } \ - h4, h5, h6 { font-size: 100%; font-weight: bold; }' + extra_css = '''body { font-family: verdana, helvetica, sans-serif; } + .introduction, .first { font-weight: bold; } + .cross-head { font-weight: bold; font-size: 125%; } + .cap, .caption { display: block; font-size: 80%; font-style: italic; } + .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } + .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, + .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; + font-size: 80%; font-style: italic; margin: 1px auto; } + .story-date, .published { font-size: 80%; } + table { width: 100%; } + td img { display: block; margin: 5px auto; } + ul { padding-top: 10px; } + ol { padding-top: 10px; } + li { padding-top: 5px; padding-bottom: 5px; } + h1 { font-size: 175%; font-weight: bold; } + h2 { font-size: 150%; font-weight: bold; } + h3 { font-size: 125%; font-weight: bold; } + h4, h5, h6 { font-size: 100%; font-weight: bold; }''' # Remove the line breaks and float left/right and picture width/height. preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), diff --git a/recipes/computer_weekly.recipe b/recipes/computer_weekly.recipe index ed63d407d0..7aad0dde7f 100644 --- a/recipes/computer_weekly.recipe +++ b/recipes/computer_weekly.recipe @@ -25,7 +25,7 @@ class ComputerWeekly(BasicNewsRecipe): ('Financial services IT news', 'https://www.computerweekly.com/rss/Financial-services-IT-news.xml'), ('Public sector IT news', 'https://www.computerweekly.com/rss/Public-sector-IT-news.xml'), ('Enterprise software', 'https://www.computerweekly.com/rss/Enterprise-software.xml'), - ('SME IT news' , 'https://www.computerweekly.com/rss/SME-IT-news.xml'), + ('SME IT news', 'https://www.computerweekly.com/rss/SME-IT-news.xml'), ('Datacenter and cloud computing', 'https://www.computerweekly.com/rss/Datacentre-and-cloud-computing.xml'), ('Storage', 'https://www.computerweekly.com/rss/Storage.xml'), ('Information Management', 'https://www.computerweekly.com/rss/Information-management.xml'), diff --git a/recipes/computerworld_dk.recipe b/recipes/computerworld_dk.recipe index d34ebac609..1102a09b66 100644 --- a/recipes/computerworld_dk.recipe +++ b/recipes/computerworld_dk.recipe @@ -51,4 +51,3 @@ class WwwComputerworld_dk(BasicNewsRecipe): ('IDG Kurser', 'http://job.idgkurser.dk/rss/'), ] - diff --git a/recipes/contretemps.recipe b/recipes/contretemps.recipe index fb72855ba0..8c96314279 100644 --- a/recipes/contretemps.recipe +++ b/recipes/contretemps.recipe @@ -36,9 +36,9 @@ class ContretempsRecipe(BasicNewsRecipe): return None def default_cover(self, cover_file): - """ + ''' Crée une couverture personnalisée pour Contretemps - """ + ''' from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data @@ -56,7 +56,7 @@ class ContretempsRecipe(BasicNewsRecipe): weekday = french_weekday[wkd] month = french_month[today.month] - date_str = f"{weekday} {today.day} {month} {today.year}" + date_str = f'{weekday} {today.day} {month} {today.year}' edition = today.strftime('Édition de %Hh%M') # Création de l'image de base (ratio ~1.6 pour format livre) diff --git a/recipes/contropiano.recipe b/recipes/contropiano.recipe index 26108dec66..31edae46b8 100644 --- a/recipes/contropiano.recipe +++ b/recipes/contropiano.recipe @@ -27,25 +27,18 @@ class AdvancedUserRecipe(BasicNewsRecipe): remove_tags_after = dict(name='p', attrs={'class': ['firma-redazione']}) feeds = [ - (u'Politica', - u'http://contropiano.org/news/politica-news/feed'), - (u'Internazionale', - u'http://contropiano.org/news/internazionale-news/feed'), - (u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'), - (u'Economia', - u'http://contropiano.org/news/news-economia/feed'), - (u'Ambiente', - u'http://contropiano.org/news/ambiente-news/feed'), - (u'Scienza', - u'http://contropiano.org/news/scienza-news/feed'), - (u'Cultura', - u'http://contropiano.org/news/cultura-news/feed'), + (u'Politica', u'http://contropiano.org/news/politica-news/feed'), + (u'Internazionale', u'http://contropiano.org/news/internazionale-news/feed'), + (u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'), + (u'Economia', u'http://contropiano.org/news/news-economia/feed'), + (u'Ambiente', u'http://contropiano.org/news/ambiente-news/feed'), + (u'Scienza', u'http://contropiano.org/news/scienza-news/feed'), + (u'Cultura', u'http://contropiano.org/news/cultura-news/feed'), (u'Locali', u'http://contropiano.org/regionali/feed'), (u'Lavoro', u'http://contropiano.org/news/lavoro-conflitto-news/feed'), - (u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'), + (u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'), (u'Interventi', u'http://contropiano.org/interventi/feed'), (u'Documenti', u'http://contropiano.org/documenti/feed'), (u'Vignette', u'http://contropiano.org/vignette/feed'), - (u'Altro', - u'http://contropiano.org/altro/feed') + (u'Altro', u'http://contropiano.org/altro/feed') ] diff --git a/recipes/cosmos.recipe b/recipes/cosmos.recipe index b973c027bd..0100149ed0 100644 --- a/recipes/cosmos.recipe +++ b/recipes/cosmos.recipe @@ -5,10 +5,10 @@ from calibre.web.feeds.news import BasicNewsRecipe class CosmosMagazine(BasicNewsRecipe): - title = "Cosmos Magazine" + title = 'Cosmos Magazine' description = ( - "Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec)." - "It is produced by The Royal Institution of Australia Inc (RiAus)." + 'Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec).' + 'It is produced by The Royal Institution of Australia Inc (RiAus).' ) language = 'en_AU' __author__ = 'yodha8' diff --git a/recipes/courrierinternational.recipe b/recipes/courrierinternational.recipe index 5aa05d55dc..043952ceaa 100644 --- a/recipes/courrierinternational.recipe +++ b/recipes/courrierinternational.recipe @@ -2,8 +2,8 @@ # vim:fileencoding=utf-8 __license__ = 'GPL v3' -__copyright__ = '2009, Mathieu Godlewski \ -2015, Rémi Vanicat , ' + '2015, Rémi Vanicat '), (re.compile(r'(]*>[ \t\r\n]*)*', re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(u'(]*>[ \t\r\n]*)*(\u25B6|\u25CF|\u261E|\u24D2|\(c\))*\[[^\]]*(\u24D2|\(c\)|\uAE30\uC0AC|\uC778\uAE30[^\]]*\uB274\uC2A4)[^\]]*\].*
', re.DOTALL | re.IGNORECASE), # noqa + (re.compile(u'(]*>[ \t\r\n]*)*(\u25B6|\u25CF|\u261E|\u24D2|\\(c\\))*\\[[^\\]]*(\u24D2|\\(c\\)|\uAE30\uC0AC|\uC778\uAE30[^\\]]*\uB274\uC2A4)[^\\]]*\\].*', re.DOTALL | re.IGNORECASE), # noqa: E501, RUF039 lambda match: ''), ] diff --git a/recipes/de_standaard.recipe b/recipes/de_standaard.recipe index 4bb2222672..e374a22f44 100644 --- a/recipes/de_standaard.recipe +++ b/recipes/de_standaard.recipe @@ -71,10 +71,10 @@ class AdvancedUserRecipe1467571059(BasicNewsRecipe): remove_tags = [ dict(name=['embed', 'object']), dict(name='div', attrs={'class':['note NotePortrait', 'note']}), - dict(name='ul', attrs={'class':re.compile('article__share')}), + dict(name='ul', attrs={'class':re.compile(r'article__share')}), dict(name='div', attrs={'class':'slideshow__controls'}), dict(name='a', attrs={'role':'button'}), - dict(name='figure', attrs={'class':re.compile('video')}) + dict(name='figure', attrs={'class':re.compile(r'video')}) ] remove_attributes = ['width', 'height'] diff --git a/recipes/debunkingdenialism.recipe b/recipes/debunkingdenialism.recipe index 71fbd69a53..1d190974c3 100644 --- a/recipes/debunkingdenialism.recipe +++ b/recipes/debunkingdenialism.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bellingcat(BasicNewsRecipe): title = 'Debunking Denialism' __author__ = 'bugmen00t' - description = 'Debunking Denialism is a website dedicated to the refutation of pseudoscience and denialism by applying scientific skepticism and defending evidence-based science. Fighting pseudoscience and quackery with reason and evidence.' # noqa + description = 'Debunking Denialism is a website dedicated to the refutation of pseudoscience and denialism by applying scientific skepticism and defending evidence-based science. Fighting pseudoscience and quackery with reason and evidence.' # noqa: E501 publisher = 'Debunking Denialism' category = 'blog' cover_url = u'https://i0.wp.com/debunkingdenialism.com/wp-content/uploads/2017/06/cropped-newestblavatar.jpg' diff --git a/recipes/deccan_herald.recipe b/recipes/deccan_herald.recipe index 81525ea8fa..7070db9904 100644 --- a/recipes/deccan_herald.recipe +++ b/recipes/deccan_herald.recipe @@ -5,6 +5,7 @@ def absurl(url): if url.startswith('/'): return 'https://www.deccanherald.com' + url + class herald(BasicNewsRecipe): title = 'Deccan Herald' __author__ = 'unkn0wn' diff --git a/recipes/degentenaar.recipe b/recipes/degentenaar.recipe index 13f93f778a..93ff41ae08 100644 --- a/recipes/degentenaar.recipe +++ b/recipes/degentenaar.recipe @@ -36,9 +36,8 @@ class DeGentenaarOnline(BasicNewsRecipe): '--comment', description, '--category', category, '--publisher', publisher ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \ - category + \ - '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + html2epub_options = ('publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "') keep_only_tags = [dict(name='span', attrs={ 'id': ['lblArticleTitle', 'lblArticleIntroduction', 'lblArticleMainText']})] @@ -63,7 +62,7 @@ class DeGentenaarOnline(BasicNewsRecipe): return url.replace('/Detail.aspx?articleid', '/PrintArticle.aspx?ArticleID') def get_article_url(self, article): - return article.get('guid', None) + return article.get('guid', None) def preprocess_html(self, soup): del soup.body['onload'] @@ -77,9 +76,9 @@ class DeGentenaarOnline(BasicNewsRecipe): soup.html['lang'] = self.lang soup.html['dir'] = self.direction mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) + ('http-equiv', 'Content-Language'), ('content', self.lang)]) mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) return soup diff --git a/recipes/democracy_journal.recipe b/recipes/democracy_journal.recipe index 96c80c946c..965947d0df 100644 --- a/recipes/democracy_journal.recipe +++ b/recipes/democracy_journal.recipe @@ -16,8 +16,8 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe): def parse_index(self): articles = [] feeds = [] - soup = self.index_to_soup("http://www.democracyjournal.org") - for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")): + soup = self.index_to_soup('http://www.democracyjournal.org') + for x in soup.findAll(href=re.compile(r'http://www\.democracyjournal\.org/\d*/.*php$')): url = x.get('href') title = self.tag_to_string(x) articles.append({'title': title, 'url': url, diff --git a/recipes/democracy_now.recipe b/recipes/democracy_now.recipe index cfd9900c25..b6df72b0bc 100644 --- a/recipes/democracy_now.recipe +++ b/recipes/democracy_now.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class DemocracyNowRecipe(BasicNewsRecipe): title = u'Democracy now!' __author__ = u'Antoine Beaupré' - description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.' # noqa + description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.' # noqa: E501 language = 'en' cover_url = 'http://www.democracynow.org/images/dn-logo-for-podcast.png' diff --git a/recipes/demorgen_be.recipe b/recipes/demorgen_be.recipe index 98e77f97c3..8d7eab72a4 100644 --- a/recipes/demorgen_be.recipe +++ b/recipes/demorgen_be.recipe @@ -1,8 +1,8 @@ #!/usr/bin/env python -""" +''' demorgen.be -""" +''' from calibre.web.feeds.news import BasicNewsRecipe @@ -13,7 +13,7 @@ class DeMorganBe(BasicNewsRecipe): description = 'News from Belgium in Dutch' oldest_article = 1 language = 'nl_BE' - encoding = "utf-8" + encoding = 'utf-8' max_articles_per_feed = 100 no_stylesheets = True remove_attributes = ['style', 'height', 'width'] @@ -23,10 +23,10 @@ class DeMorganBe(BasicNewsRecipe): masthead_url = 'https://www.demorgen.be/_next/static/media/demorgen_logo.dce579e2.svg' cover_url = 'https://usercontent.one/wp/www.insidejazz.be/wp-content/uploads/2018/11/pic0143.png' - extra_css = """ + extra_css = ''' time, [data-test-id:"article-label"], [data-test-id:"article-sublabel"], [[data-test-id:"article-author"]] { font-size:small; } [data-test-id:"header-intro"] { font-style: italic; } - """ + ''' keep_only_tags = [ dict(name='article', attrs={'id': 'article-content'}), diff --git a/recipes/den_of_geek.recipe b/recipes/den_of_geek.recipe index 518b1b23ca..4274a50737 100644 --- a/recipes/den_of_geek.recipe +++ b/recipes/den_of_geek.recipe @@ -5,9 +5,8 @@ class AdvancedUserRecipe1316944753(BasicNewsRecipe): title = u'Den of Geek' __author__ = 'Jaded' language = 'en' - description = 'From science fiction enthusiasts through to gaming fanatics, Den of Geek has become the one-stop UK website for people genuinely passionate about their entertainment media. Den of Geek covers popular culture but always with an edgy, UK centric slant that sets it apart from the crowd.' # noqa + description = 'From science fiction enthusiasts through to gaming fanatics, Den of Geek has become the one-stop UK website for people genuinely passionate about their entertainment media. Den of Geek covers popular culture but always with an edgy, UK centric slant that sets it apart from the crowd.' # noqa: E501 category = 'Movies, TV, Games, Comics, Cult, News, Reviews' - language = 'en' oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/denik.cz.recipe b/recipes/denik.cz.recipe index 2af252fc9a..b856914ab8 100644 --- a/recipes/denik.cz.recipe +++ b/recipes/denik.cz.recipe @@ -23,8 +23,8 @@ class ceskyDenikRecipe(BasicNewsRecipe): cover_url = 'http://g.denik.cz/images/loga/denik.png' remove_javascript = True no_stylesheets = True - extra_css = """ - """ + extra_css = ''' + ''' remove_tags = [] keep_only_tags = [dict(name='div', attrs={'class': 'content'})] diff --git a/recipes/denik_referendum.recipe b/recipes/denik_referendum.recipe index 35fdd1248c..486524ab67 100644 --- a/recipes/denik_referendum.recipe +++ b/recipes/denik_referendum.recipe @@ -25,4 +25,4 @@ class denikReferendumRecipe(BasicNewsRecipe): remove_tags = [dict(name='div', attrs={'class': ['box boxLine', 'box noprint', 'box']}), dict(name='h3', attrs={'class': 'head alt'})] - keep_only_tags = [dict(name='div', attrs={'id': ['content']})] + keep_only_tags = [dict(name='div', attrs={'id': ['content']})] diff --git a/recipes/denikn.cz.recipe b/recipes/denikn.cz.recipe index 50c7b9fd3e..a857cb98a2 100644 --- a/recipes/denikn.cz.recipe +++ b/recipes/denikn.cz.recipe @@ -11,11 +11,11 @@ CZ_MONTHS = ['led', 'úno', 'bře', 'dub', 'kvě', 'čen', 'čec', 'srp', 'zář def cz_title_time(): - """ + ''' Helper function to return date with czech locale. Uses hardcoded lookup table of day and month names as strftime requires locale change that is not thread safe. - """ + ''' today = datetime.today() weekday = CZ_DAYS[today.weekday()] month = CZ_MONTHS[today.month-1] @@ -26,9 +26,9 @@ def cz_title_time(): class DenikNRecipe(BasicNewsRecipe): - """ + ''' Recipe for the RSS feed of https://denikn.cz/ - """ + ''' title = u'Deník N' __author__ = 'Robert Mihaly' diff --git a/recipes/deredactie.recipe b/recipes/deredactie.recipe index 1f6a1e5316..eb9dd676e6 100644 --- a/recipes/deredactie.recipe +++ b/recipes/deredactie.recipe @@ -31,13 +31,13 @@ class deredactie(BasicNewsRecipe): catnames = {} soup = self.index_to_soup( 'http://www.deredactie.be/cm/vrtnieuws.deutsch') - for elem in soup.findAll('li', attrs={'id': re.compile("^navItem[2-9]")}): + for elem in soup.findAll('li', attrs={'id': re.compile(r'^navItem[2-9]')}): a = elem.find('a', href=True) - m = re.search('(?<=/)[^/]*$', a['href']) + m = re.search(r'(?<=/)[^/]*$', a['href']) cat = str(m.group(0)) categories.append(cat) catnames[cat] = a['title'] - self.log("found cat %s\n" % catnames[cat]) + self.log('found cat %s\n' % catnames[cat]) feeds = [] @@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe): articles = [] soup = self.index_to_soup( 'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat) - for a in soup.findAll('a', attrs={'href': re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}): + for a in soup.findAll('a', attrs={'href': re.compile(r'deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}): skip_this_article = False url = a['href'].strip() if url.startswith('/'): @@ -55,12 +55,12 @@ class deredactie(BasicNewsRecipe): for article in articles: if article['url'] == url: skip_this_article = True - self.log("SKIPPING DUP %s" % url) + self.log('SKIPPING DUP %s' % url) break if skip_this_article: continue articles.append(myarticle) - self.log("Adding URL %s\n" % url) + self.log('Adding URL %s\n' % url) if articles: feeds.append((catnames[cat], articles)) return feeds diff --git a/recipes/descopera.recipe b/recipes/descopera.recipe index 3186b28ecb..949dc006a9 100644 --- a/recipes/descopera.recipe +++ b/recipes/descopera.recipe @@ -18,12 +18,10 @@ class Descopera(BasicNewsRecipe): oldest_article = 5 language = 'ro' max_articles_per_feed = 100 - no_stylesheets = True use_embedded_content = False category = 'Ziare,Reviste,Descopera' encoding = 'utf-8' cover_url = 'http://www.descopera.ro/images/header_images/logo.gif' - use_embedded_content = False no_stylesheets = True auto_cleanup = True diff --git a/recipes/desiring_god.recipe b/recipes/desiring_god.recipe index bd287217ef..579ce6d873 100644 --- a/recipes/desiring_god.recipe +++ b/recipes/desiring_god.recipe @@ -11,7 +11,6 @@ class DesiringGodEnglish(BasicNewsRecipe): cover_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png' masthead_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png' - language = 'en' oldest_article = 7 max_articles_per_feed = 50 auto_cleanup = True diff --git a/recipes/deutsche_welle_de.recipe b/recipes/deutsche_welle_de.recipe index 985b43e59c..b70b58fb9f 100644 --- a/recipes/deutsche_welle_de.recipe +++ b/recipes/deutsche_welle_de.recipe @@ -11,8 +11,6 @@ class DeutscheWelle(BasicNewsRecipe): max_articles_per_feed = 200 no_stylesheets = True remove_javascript = True - no_stylesheets = True - remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] diff --git a/recipes/deutsche_welle_es.recipe b/recipes/deutsche_welle_es.recipe index 5076f1425d..96639af75a 100644 --- a/recipes/deutsche_welle_es.recipe +++ b/recipes/deutsche_welle_es.recipe @@ -54,7 +54,6 @@ class DeutscheWelle_es(BasicNewsRecipe): ('Conozca Alemania', 'http://rss.dw-world.de/rdf/rss-sp-con') ] - def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): img['src'] = img['srcset'].split()[6] diff --git a/recipes/deutsche_welle_ru.recipe b/recipes/deutsche_welle_ru.recipe index 140722b3be..aca0b9cf2a 100644 --- a/recipes/deutsche_welle_ru.recipe +++ b/recipes/deutsche_welle_ru.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle(BasicNewsRecipe): title = u'Deutsche Welle \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u043E\u043C' - description = u'\u0420\u0443\u0441\u0441\u043A\u0430\u044F \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F Deutsche Welle: \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438\u0437 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0415\u0432\u0440\u043E\u043F\u044B, \u043D\u0435\u043C\u0435\u0446\u043A\u0438\u0439 \u0438 \u0435\u0432\u0440\u043E\u043F\u0435\u0439\u0441\u043A\u0438\u0439 \u0432\u0437\u0433\u043B\u044F\u0434 \u043D\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0440\u0430\u043A\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043E\u0432\u0435\u0442\u044B \u0434\u043B\u044F \u0442\u0443\u0440\u0438\u0441\u0442\u043E\u0432 \u0438 \u0442\u0435\u0445, \u043A\u0442\u043E \u0436\u0435\u043B\u0430\u0435\u0442 \u0443\u0447\u0438\u0442\u044C\u0441\u044F \u0438\u043B\u0438 \u0440\u0430\u0431\u043E\u0442\u0430\u0442\u044C \u0432 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0415\u0432\u0440\u043E\u0441\u043E\u044E\u0437\u0430.' # noqa + description = u'\u0420\u0443\u0441\u0441\u043A\u0430\u044F \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F Deutsche Welle: \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438\u0437 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0415\u0432\u0440\u043E\u043F\u044B, \u043D\u0435\u043C\u0435\u0446\u043A\u0438\u0439 \u0438 \u0435\u0432\u0440\u043E\u043F\u0435\u0439\u0441\u043A\u0438\u0439 \u0432\u0437\u0433\u043B\u044F\u0434 \u043D\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0440\u0430\u043A\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043E\u0432\u0435\u0442\u044B \u0434\u043B\u044F \u0442\u0443\u0440\u0438\u0441\u0442\u043E\u0432 \u0438 \u0442\u0435\u0445, \u043A\u0442\u043E \u0436\u0435\u043B\u0430\u0435\u0442 \u0443\u0447\u0438\u0442\u044C\u0441\u044F \u0438\u043B\u0438 \u0440\u0430\u0431\u043E\u0442\u0430\u0442\u044C \u0432 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0415\u0432\u0440\u043E\u0441\u043E\u044E\u0437\u0430.' # noqa: E501 __author__ = 'bugmen00t, unkn0wn' publication_type = 'newspaper' oldest_article = 2 diff --git a/recipes/deutsche_welle_sr.recipe b/recipes/deutsche_welle_sr.recipe index 134ea368fc..e0a052bcd2 100644 --- a/recipes/deutsche_welle_sr.recipe +++ b/recipes/deutsche_welle_sr.recipe @@ -59,4 +59,3 @@ class DeutscheWelle_sr(BasicNewsRecipe): (u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science'), (u'Kultura', u'feed:http://rss.dw-world.de/rdf/rss-ser-cul') ] - diff --git a/recipes/deutschland_funk.recipe b/recipes/deutschland_funk.recipe index aefe587276..d742b8fd41 100644 --- a/recipes/deutschland_funk.recipe +++ b/recipes/deutschland_funk.recipe @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -#from __future__ import unicode_literals, division, absolute_import, print_function +# from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe __license__ = 'GPL v3' @@ -14,6 +14,7 @@ Fetch Deutschlandfunk & Deutschlandfunk Kultur ## Version:1.6 ## New RSS source: https://www.deutschlandfunk.de/rss-angebot-102.html + class AdvancedUserRecipe1432200863(BasicNewsRecipe): title = 'Deutschlandfunk & Deutschlandfunk Kultur' @@ -28,14 +29,12 @@ class AdvancedUserRecipe1432200863(BasicNewsRecipe): max_articles_per_feed = 100 auto_cleanup = False - extra_css = ''' + extra_css = ''' h1, h2 {font-size: 1.6em; text-align: left} .article-header-description {font-size: 1em; font-style: italic; font-weight: normal;margin-bottom: 1em} .b-image-figure, .caption-figure.is-left, .b-image-credits {font-size: .75em; font-weight: normal;margin-bottom: .75em} ''' - - feeds = [ ('DLF Nachrichten', 'https://www.deutschlandfunk.de/nachrichten-100.rss'), ('DLF Politikportal', 'https://www.deutschlandfunk.de/politikportal-100.rss'), @@ -59,8 +58,8 @@ class AdvancedUserRecipe1432200863(BasicNewsRecipe): ('DLF-Kultur Film / Serie', 'https://www.deutschlandfunkkultur.de/film-serie-100.rss'), ] keep_only_tags = [ - dict(name='nav', attrs={'class':'b-breadcrumbs'}), # DLF articles - dict(name='article', attrs={'class':'b-article'}), # DLF articles + dict(name='nav', attrs={'class':'b-breadcrumbs'}), # DLF articles + dict(name='article', attrs={'class':'b-article'}), # DLF articles dict(name='div', attrs={'class':[ 'b-section-article-head-area', 'b-section-editor-content', @@ -77,6 +76,6 @@ class AdvancedUserRecipe1432200863(BasicNewsRecipe): dict(name='ul', attrs={'class':['b-social-icons']}), # DLF articles dict(name='ul', attrs={'class':['b-social-icons']}), # DLF Kultur articles - dict(name='div', attrs={'class':'b-footer-area-series'}), # DLF Kultur articles + dict(name='div', attrs={'class':'b-footer-area-series'}), # DLF Kultur articles dict(name='div', attrs={'id':'weekender'}) ] diff --git a/recipes/dev_ua.recipe b/recipes/dev_ua.recipe index ed778d908f..49e32138d4 100644 --- a/recipes/dev_ua.recipe +++ b/recipes/dev_ua.recipe @@ -17,14 +17,14 @@ class WiComix(BasicNewsRecipe): auto_cleanup = False oldest_article = 3 max_articles_per_feed = 30 - description = '\u041C\u0435\u0434\u0456\u0430 \u043F\u0440\u043E \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0456\u043D\u043D\u043E\u0432\u0430\u0446\u0456\u0457, \u0432\u0438\u043D\u0430\u0445\u043E\u0434\u0438 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u0457\u0457 \u043C\u0435\u0436\u0430\u043C\u0438. \u041F\u0440\u043E \u0442\u0435, \u044F\u043A \u0432\u043E\u043D\u0438 \u0432\u043F\u043B\u0438\u0432\u0430\u044E\u0442\u044C \u043D\u0430 \u0436\u0438\u0442\u0442\u044F \u043B\u044E\u0434\u0435\u0439.' # noqa + description = '\u041C\u0435\u0434\u0456\u0430 \u043F\u0440\u043E \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0456\u0457, \u0456\u043D\u043D\u043E\u0432\u0430\u0446\u0456\u0457, \u0432\u0438\u043D\u0430\u0445\u043E\u0434\u0438 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u0457\u0457 \u043C\u0435\u0436\u0430\u043C\u0438. \u041F\u0440\u043E \u0442\u0435, \u044F\u043A \u0432\u043E\u043D\u0438 \u0432\u043F\u043B\u0438\u0432\u0430\u044E\u0442\u044C \u043D\u0430 \u0436\u0438\u0442\u0442\u044F \u043B\u044E\u0434\u0435\u0439.' # noqa: E501 language = 'uk' remove_tags_before = dict(name='div', attrs={'class': 'article__header'}) remove_tags_after = dict(name='div', attrs={'class': 'article__body'}) - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'article__reference article__reference_header'}), dict(name='div', attrs={'class': 'my-lg-5'}), dict(name='div', attrs={'class': 'video '}), diff --git a/recipes/diario_sport.recipe b/recipes/diario_sport.recipe index 312917eb5d..e02db71cba 100644 --- a/recipes/diario_sport.recipe +++ b/recipes/diario_sport.recipe @@ -20,7 +20,7 @@ class DiarioSport(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'id': ['noticiasMedio']})] remove_tags = [ - dict(name=['object', 'link', 'script', 'ul']), dict(name='div', attrs={'id': ['scrAdSense', 'herramientas2', 'participacion', 'participacion2', 'bloque1resultados', 'bloque2resultados', 'cont_vinyetesAnt', 'tinta', 'noticiasSuperior', 'cintillopublicidad2']}), dict( name='p', attrs={'class': ['masinformacion', 'hora']}), dict(name='a', attrs={'class': ["'link'"]}), dict(name='div', attrs={'class': ['addthis_toolbox addthis_default_style', 'firma', 'pretitularnoticia']}), dict(name='form', attrs={'id': ['formularioDeBusquedaAvanzada']}) # noqa + dict(name=['object', 'link', 'script', 'ul']), dict(name='div', attrs={'id': ['scrAdSense', 'herramientas2', 'participacion', 'participacion2', 'bloque1resultados', 'bloque2resultados', 'cont_vinyetesAnt', 'tinta', 'noticiasSuperior', 'cintillopublicidad2']}), dict(name='p', attrs={'class': ['masinformacion', 'hora']}), dict(name='a', attrs={'class': ["'link'"]}), dict(name='div', attrs={'class': ['addthis_toolbox addthis_default_style', 'firma', 'pretitularnoticia']}), dict(name='form', attrs={'id': ['formularioDeBusquedaAvanzada']}) # noqa: E501 ] def preprocess_html(self, soup): diff --git a/recipes/digizone.recipe b/recipes/digizone.recipe index 5726c41782..e486daeeec 100644 --- a/recipes/digizone.recipe +++ b/recipes/digizone.recipe @@ -19,9 +19,9 @@ class DigiZoneCZ(BasicNewsRecipe): publication_type = 'newsportal' no_stylesheets = True remove_javascript = True - extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \ - p.perex img {display:none;} \ - .urs p {margin: 0 0 0.8em 0;}' + extra_css = '''p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} + p.perex img {display:none;} + .urs p {margin: 0 0 0.8em 0;}''' feeds = [ (u'Aktuality', u'http://rss.digizone.cz/aktuality'), diff --git a/recipes/dilema.recipe b/recipes/dilema.recipe index 1a64701880..468065ad6c 100644 --- a/recipes/dilema.recipe +++ b/recipes/dilema.recipe @@ -12,8 +12,8 @@ class Volkskrant(BasicNewsRecipe): country = 'RO' category = 'politics, culture, Romania' resolve_internal_links = True - remove_tags_before = { 'class': 'post' } - remove_tags_after = { 'class': 'post_content' } + remove_tags_before = {'class': 'post'} + remove_tags_after = {'class': 'post_content'} remove_tags = [ dict( attrs={ @@ -34,7 +34,7 @@ class Volkskrant(BasicNewsRecipe): dict(id=['like', 'dlik']), dict(name=['script', 'noscript', 'style']), ] - remove_attributes = ["class", "id", "name", "style"] + remove_attributes = ['class', 'id', 'name', 'style'] encoding = 'utf-8' no_stylesheets = True ignore_duplicate_articles = {'url'} @@ -88,7 +88,7 @@ class Volkskrant(BasicNewsRecipe): ) ) - sections = [("Numărul curent", articles)] + sections = [('Numărul curent', articles)] return sections def preprocess_html(self, soup): diff --git a/recipes/discover_magazine_monthly.recipe b/recipes/discover_magazine_monthly.recipe index 56db6633b9..2ba78e6633 100644 --- a/recipes/discover_magazine_monthly.recipe +++ b/recipes/discover_magazine_monthly.recipe @@ -81,11 +81,11 @@ class DiscoverMagazine(BasicNewsRecipe): preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), (re.compile(r'', re.IGNORECASE), lambda m: '')] - extra_css = 'body { font-family: helvetica, sans-serif; } \ - .belowdeck {font-style: italic; padding=bottom: 10px; max-width: none} \ - .caption {font-style: italic; padding=bottom: 10px; max-width: none} \ - .caption1 {font-style: italic; padding=bottom: 10px; max-width: none} \ - h2 { text-align: left; font-size: 1em; font-weight: bold; }}' + extra_css = '''body { font-family: helvetica, sans-serif; } + .belowdeck {font-style: italic; padding=bottom: 10px; max-width: none} + .caption {font-style: italic; padding=bottom: 10px; max-width: none} + .caption1 {font-style: italic; padding=bottom: 10px; max-width: none} + h2 { text-align: left; font-size: 1em; font-weight: bold; }}''' def parse_index(self): # gets current month from homepage and append to index @@ -104,8 +104,7 @@ class DiscoverMagazine(BasicNewsRecipe): if cover is not None: img = cover.find('img', src=True) if img is not None: - self.cover_url = 'http://www.discovermagazine.com' + \ - img['src'].replace(' ', '%20') # [:-7] + self.cover_url = 'http://www.discovermagazine.com' + img['src'].replace(' ', '%20') # [:-7] # parse articles for tag in col.findAll(name=['h3', 'div'], attrs={'class': ['bottomBorder', 'headline']}): if tag.name == 'h3': diff --git a/recipes/distrowatch_weekly.recipe b/recipes/distrowatch_weekly.recipe index 836b8a3d01..03aa38ed22 100644 --- a/recipes/distrowatch_weekly.recipe +++ b/recipes/distrowatch_weekly.recipe @@ -1,8 +1,8 @@ #!/usr/bin/env python -__license__ = "GPL v3" +__license__ = 'GPL v3' -"""DistroWatch Weekly""" +'''DistroWatch Weekly''' import datetime @@ -10,28 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe class DistroWatchWeekly(BasicNewsRecipe): - title = "DistroWatch Weekly" - description = "Weekly news about Linux distributions" - category = "Linux, Technology, News" + title = 'DistroWatch Weekly' + description = 'Weekly news about Linux distributions' + category = 'Linux, Technology, News' oldest_article = 14 - language = "en" + language = 'en' max_articles_per_feed = 50 no_stylesheets = True use_embedded_content = False - timefmt = " [%A, %d %B, %Y]" + timefmt = ' [%A, %d %B, %Y]' auto_cleanup = False keep_only_tags = [ dict( attrs={ - "class": - lambda x: x and ("News1" in x) + 'class': + lambda x: x and ('News1' in x) } ) ] def _get_mag_date(self): - """Return date of latest weekly issue.""" + '''Return date of latest weekly issue.''' d = datetime.date(2022, 6, 20) t = datetime.date.today() @@ -45,17 +45,17 @@ class DistroWatchWeekly(BasicNewsRecipe): # Get URL of latest mag page ld = self._get_mag_date() - url = ld.strftime("https://distrowatch.com/weekly.php?issue=%Y%m%d") + url = ld.strftime('https://distrowatch.com/weekly.php?issue=%Y%m%d') url = url.lower() - title = ld.strftime("DistroWatch Weekly for %Y-%m-%d") + title = ld.strftime('DistroWatch Weekly for %Y-%m-%d') # Get articles stories = [{ - "url": url, - "title": title, + 'url': url, + 'title': title, },] index = [ - ("Articles", stories), + ('Articles', stories), ] return index diff --git a/recipes/dnevnik_cro.recipe b/recipes/dnevnik_cro.recipe index e3bafb6c1c..ea9fe20023 100644 --- a/recipes/dnevnik_cro.recipe +++ b/recipes/dnevnik_cro.recipe @@ -23,7 +23,7 @@ def new_tag(soup, name, attrs=()): class DnevnikCro(BasicNewsRecipe): title = 'Dnevnik - Hr' __author__ = 'Darko Miletic' - description = "Vijesti iz Hrvatske" + description = 'Vijesti iz Hrvatske' publisher = 'Dnevnik.hr' category = 'news, politics, Croatia' oldest_article = 2 @@ -36,13 +36,13 @@ class DnevnikCro(BasicNewsRecipe): lang = 'hr-HR' direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa: E501 conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 keep_only_tags = [dict(name='div', attrs={'id': 'article'})] @@ -59,7 +59,7 @@ class DnevnikCro(BasicNewsRecipe): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa + attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border'] # noqa: E501 for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: @@ -67,9 +67,9 @@ class DnevnikCro(BasicNewsRecipe): del item[attrib] mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) + ('http-equiv', 'Content-Language'), ('content', self.lang)]) mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) return self.adeify_images(soup) diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index 55e906e17b..b9b7bfa220 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -15,7 +15,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): index = 'http://www.dobreprogramy.pl/' no_stylesheets = True language = 'pl' - extra_css = '.title {font-size:22px;}' + extra_css = '.title {font-size:22px;}, h1 { font-size:130% }' oldest_article = 8 max_articles_per_feed = 100 remove_attrs = ['style', 'width', 'height'] @@ -23,7 +23,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): type(u'')(r'
Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
')), lambda match: '')] keep_only_tags = [dict(name='h1'), dict( attrs={'class': ['entry single']}), dict(id='phContent_divArticle')] - remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa + remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa: E501 feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] @@ -41,7 +41,3 @@ class Dobreprogramy_pl(BasicNewsRecipe): if not r.string: r.extract() return soup - - extra_css = ''' - h1 { font-size:130% } - ''' diff --git a/recipes/donga.recipe b/recipes/donga.recipe index 0c55c88300..ddefdf61e3 100644 --- a/recipes/donga.recipe +++ b/recipes/donga.recipe @@ -4,46 +4,46 @@ from calibre.web.feeds.recipes import BasicNewsRecipe # Comment out sections you are not interested in sections = [ - ("정치", "politics"), - ("사회", "national"), - ("경제", "economy"), - ("국제", "international"), - ("사설칼럼", "editorials"), - ("의학과학", "science"), - ("문화연예", "culture"), - ("스포츠", "sports"), - ("사람속으로", "inmul") + ('정치', 'politics'), + ('사회', 'national'), + ('경제', 'economy'), + ('국제', 'international'), + ('사설칼럼', 'editorials'), + ('의학과학', 'science'), + ('문화연예', 'culture'), + ('스포츠', 'sports'), + ('사람속으로', 'inmul'), # Following sections are marked as marked optional # as default. Uncomment to enable. - # , (u'건강', 'health') - # , (u'레저', 'leisure') - # , (u'도서', 'book') - # , (u'공연', 'show') - # , (u'여성', 'woman') - # , (u'여행', 'travel') - # , (u'생활정보', 'lifeinfo') + # (u'건강', 'health'), + # (u'레저', 'leisure'), + # (u'도서', 'book'), + # (u'공연', 'show'), + # (u'여성', 'woman'), + # (u'여행', 'travel'), + # (u'생활정보', 'lifeinfo'), ] class Donga(BasicNewsRecipe): - language = "ko" - title = "동아일보" - description = "동아일보 기사" - __author__ = "Minsik Cho" - ignore_duplicate_articles = {"title", "url"} + language = 'ko' + title = '동아일보' + description = '동아일보 기사' + __author__ = 'Minsik Cho' + ignore_duplicate_articles = {'title', 'url'} compress_news_images = True no_stylesheets = True oldest_article = 2 - encoding = "utf-8" + encoding = 'utf-8' # RSS Feed in syntax: # https://rss.donga.com/[sections].xml - feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections] + feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections] # Remove logo and print buttons remove_tags = [ - dict(name="div", attrs={"class": "popHeaderWrap"}), - dict(name="div", attrs={"class": "etc"}), + dict(name='div', attrs={'class': 'popHeaderWrap'}), + dict(name='div', attrs={'class': 'etc'}), ] def print_version(self, url): @@ -51,8 +51,8 @@ class Donga(BasicNewsRecipe): # https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1 # Return print version url with syntax: # https://www.donga.com/news/View?gid=[gid]&date=[date] - reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url) + reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url) date = reobject.group(1) gid = reobject.group(2) - return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date + return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date diff --git a/recipes/dovod.recipe b/recipes/dovod.recipe index 9594e6122a..6a7a8cccdc 100644 --- a/recipes/dovod.recipe +++ b/recipes/dovod.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Dovod(BasicNewsRecipe): title = '\u0414\u043E\u0432\u043E\u0434' __author__ = 'bugmen00t' - description = '\u0421\u0430\u0439\u0442 \u00AB\u0414\u043E\u0432\u043E\u0434\u00BB \u043F\u043E\u043B\u0443\u0447\u0438\u043B \u0441\u0432\u043E\u0451 \u043D\u0430\u0437\u0432\u0430\u043D\u0438\u0435 \u0432 \u0447\u0435\u0441\u0442\u044C \u0440\u0430\u043D\u0435\u0435 \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0435\u043A\u0442\u0430 \u00AB\u0414\u043E\u0432\u043E\u0434 \u2014 \u0412\u043B\u0430\u0434\u0438\u043C\u0438\u0440\u0441\u043A\u0438\u0435 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0435 \u0434\u0438\u0441\u043A\u0443\u0441\u0441\u0438\u0438\u00BB. \u041D\u0430\u0448\u0430 \u0446\u0435\u043B\u044C \u2014 \u043E\u0441\u0432\u0435\u0449\u0435\u043D\u0438\u0435 \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0445 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0432\u043E\u043F\u0440\u043E\u0441\u043E\u0432 \u0438 \u0438\u0445 \u043E\u0442\u043A\u0440\u043E\u0432\u0435\u043D\u043D\u043E\u0435 \u0438 \u043E\u0441\u043C\u044B\u0441\u043B\u0435\u043D\u043D\u043E\u0435 \u043E\u0431\u0441\u0443\u0436\u0434\u0435\u043D\u0438\u0435.' # noqa + description = '\u0421\u0430\u0439\u0442 \u00AB\u0414\u043E\u0432\u043E\u0434\u00BB \u043F\u043E\u043B\u0443\u0447\u0438\u043B \u0441\u0432\u043E\u0451 \u043D\u0430\u0437\u0432\u0430\u043D\u0438\u0435 \u0432 \u0447\u0435\u0441\u0442\u044C \u0440\u0430\u043D\u0435\u0435 \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0435\u043A\u0442\u0430 \u00AB\u0414\u043E\u0432\u043E\u0434 \u2014 \u0412\u043B\u0430\u0434\u0438\u043C\u0438\u0440\u0441\u043A\u0438\u0435 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u044B\u0435 \u0434\u0438\u0441\u043A\u0443\u0441\u0441\u0438\u0438\u00BB. \u041D\u0430\u0448\u0430 \u0446\u0435\u043B\u044C \u2014 \u043E\u0441\u0432\u0435\u0449\u0435\u043D\u0438\u0435 \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0445 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0432\u043E\u043F\u0440\u043E\u0441\u043E\u0432 \u0438 \u0438\u0445 \u043E\u0442\u043A\u0440\u043E\u0432\u0435\u043D\u043D\u043E\u0435 \u0438 \u043E\u0441\u043C\u044B\u0441\u043B\u0435\u043D\u043D\u043E\u0435 \u043E\u0431\u0441\u0443\u0436\u0434\u0435\u043D\u0438\u0435.' # noqa: E501 publisher = '\u0418\u043B\u044C\u044F \u041A\u043E\u0441\u044B\u0433\u0438\u043D, \u041A\u0438\u0440\u0438\u043B\u043B \u0418\u0448\u0443\u0442\u0438\u043D' category = 'blog' cover_url = u'https://pbs.twimg.com/profile_images/1498229545505284099/l9V1l59Z_400x400.jpg' diff --git a/recipes/dr_dk.recipe b/recipes/dr_dk.recipe index 4ab0a9726e..7f735ed313 100644 --- a/recipes/dr_dk.recipe +++ b/recipes/dr_dk.recipe @@ -14,8 +14,8 @@ class DRNyheder(BasicNewsRecipe): ('Udland', 'https://www.dr.dk/nyheder/service/feeds/udland'), ('Penge', 'https://www.dr.dk/nyheder/service/feeds/penge'), ('Politik', 'https://www.dr.dk/nyheder/service/feeds/politik'), - #('Sporten', 'https://www.dr.dk/nyheder/service/feeds/sporten'), - #('Seneste sport', 'https://www.dr.dk/nyheder/service/feeds/senestesport'), + # ('Sporten', 'https://www.dr.dk/nyheder/service/feeds/sporten'), + # ('Seneste sport', 'https://www.dr.dk/nyheder/service/feeds/senestesport'), ('Viden', 'https://www.dr.dk/nyheder/service/feeds/viden'), ('Kultur', 'https://www.dr.dk/nyheder/service/feeds/kultur'), ('Musik', 'https://www.dr.dk/nyheder/service/feeds/musik'), @@ -42,8 +42,8 @@ class DRNyheder(BasicNewsRecipe): publication_type = 'newspaper' encoding = 'utf8' language = 'da' - oldest_article = 4 # 2 might be best - max_articles_per_feed = 50 # 100 better, this is just for testing + oldest_article = 4 # 2 might be best + max_articles_per_feed = 50 # 100 better, this is just for testing no_stylesheets = True use_embedded_content = False auto_cleanup = False @@ -104,18 +104,17 @@ class DRNyheder(BasicNewsRecipe): cover_url = cover_item['src'] return cover_url - keep_only_tags = [ - dict(name="h1", attrs={'class': 'dre-article-title__heading'}), # Title - dict(name="div", attrs={'class': 'dre-article-byline'}), # Author - dict(name="figure", attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images - dict(name="p", attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article - dict(name="article", attrs={'itemtype': 'http://schema.org/NewsArticle'}), - #dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}), - #dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}), - #dict(name="div", attrs={'class': 'dre-speech'}), - #dict(name="div", attrs={'itemprop': 'author'}) + dict(name='h1', attrs={'class': 'dre-article-title__heading'}), # Title + dict(name='div', attrs={'class': 'dre-article-byline'}), # Author + dict(name='figure', attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images + dict(name='p', attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article + dict(name='article', attrs={'itemtype': 'http://schema.org/NewsArticle'}), + # dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}), + # dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}), + # dict(name="div", attrs={'class': 'dre-speech'}), + # dict(name="div", attrs={'itemprop': 'author'}) ] remove_tags = [ @@ -123,9 +122,9 @@ class DRNyheder(BasicNewsRecipe): dict(name='div', attrs={'class': [ 'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container', 'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}), - dict(name="source"), - #dict(name='menu', attrs={'class': 'share'}), - #dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}), + dict(name='source'), + # dict(name='menu', attrs={'class': 'share'}), + # dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}), ] # Fixes images having the wrong aspect ratio diff --git a/recipes/drytooling_pl.recipe b/recipes/drytooling_pl.recipe index be1245cedc..d22d8c4447 100644 --- a/recipes/drytooling_pl.recipe +++ b/recipes/drytooling_pl.recipe @@ -7,7 +7,7 @@ class BasicUserRecipe1337668045(BasicNewsRecipe): title = u'Drytooling.com.pl' masthead_url = 'http://drytooling.com.pl/images/drytooling-kindle.png' cover_url = 'http://drytooling.com.pl/images/drytooling-kindle.png' - description = u'Drytooling.com.pl jest serwisem wspinaczki zimowej, alpinizmu i himalaizmu. Jeśli uwielbiasz zimę, nie możesz doczekać się aż wyciągniesz szpej z szafki i uderzysz w Tatry, Alpy, czy może Himalaje, to znajdziesz tutaj naprawdę dużo interesujących Cię treści! Zapraszamy!' # noqa + description = u'Drytooling.com.pl jest serwisem wspinaczki zimowej, alpinizmu i himalaizmu. Jeśli uwielbiasz zimę, nie możesz doczekać się aż wyciągniesz szpej z szafki i uderzysz w Tatry, Alpy, czy może Himalaje, to znajdziesz tutaj naprawdę dużo interesujących Cię treści! Zapraszamy!' # noqa: E501 __author__ = u'Damian Granowski' language = 'pl' oldest_article = 100 diff --git a/recipes/dwutygodnik.recipe b/recipes/dwutygodnik.recipe index 574acde94d..0c5f0bf9b5 100644 --- a/recipes/dwutygodnik.recipe +++ b/recipes/dwutygodnik.recipe @@ -33,7 +33,7 @@ class dwutygodnik(BasicNewsRecipe): browser.open('http://www.dwutygodnik.com/') # find the link - epublink = browser.find_link(text_regex=re.compile('Wydanie EPUB')) + epublink = browser.find_link(text_regex=re.compile(r'Wydanie EPUB')) # download ebook self.report_progress(0, _('Downloading ePUB')) diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index e4a0305158..82f8baff32 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Dzieje(BasicNewsRecipe): title = u'dzieje.pl' __author__ = 'fenuks' - description = 'Dzieje.pl - najlepszy portal informacyjno-edukacyjny dotyczący historii Polski XX wieku. Archiwalne fotografie, filmy, katalog postaci, quizy i konkursy.' # noqa + description = 'Dzieje.pl - najlepszy portal informacyjno-edukacyjny dotyczący historii Polski XX wieku. Archiwalne fotografie, filmy, katalog postaci, quizy i konkursy.' # noqa: E501 cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' category = 'history' language = 'pl' @@ -31,7 +31,7 @@ class Dzieje(BasicNewsRecipe): soup2 = self.index_to_soup(url) pagetext = soup2.find( id='content-area').find(attrs={'class': 'content'}) - for r in pagetext.findAll(attrs={'class': ['fieldgroup group-groupkul', 'fieldgroup group-zdjeciekult', 'fieldgroup group-zdjecieciekaw', 'fieldgroup group-zdjecieksiazka', 'fieldgroup group-zdjeciedu', 'field field-type-filefield field-field-zdjecieglownawyd']}): # noqa + for r in pagetext.findAll(attrs={'class': ['fieldgroup group-groupkul', 'fieldgroup group-zdjeciekult', 'fieldgroup group-zdjecieciekaw', 'fieldgroup group-zdjecieksiazka', 'fieldgroup group-zdjeciedu', 'field field-type-filefield field-field-zdjecieglownawyd']}): # noqa: E501 r.extract() comments = pagetext.findAll( text=lambda text: isinstance(text, Comment)) @@ -63,20 +63,20 @@ class Dzieje(BasicNewsRecipe): def parse_index(self): feeds = [] - feeds.append((u"Wiadomości", self.find_articles( + feeds.append((u'Wiadomości', self.find_articles( 'http://dzieje.pl/wiadomosci'))) - feeds.append((u"Kultura i sztuka", self.find_articles( + feeds.append((u'Kultura i sztuka', self.find_articles( 'http://dzieje.pl/kulturaisztuka'))) - feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino'))) - feeds.append((u"Rozmaitości historyczne", + feeds.append((u'Film', self.find_articles('http://dzieje.pl/kino'))) + feeds.append((u'Rozmaitości historyczne', self.find_articles('http://dzieje.pl/rozmaitości'))) feeds.append( - (u"Książka", self.find_articles('http://dzieje.pl/ksiazka'))) + (u'Książka', self.find_articles('http://dzieje.pl/ksiazka'))) feeds.append( - (u"Wystawa", self.find_articles('http://dzieje.pl/wystawa'))) - feeds.append((u"Edukacja", self.find_articles( + (u'Wystawa', self.find_articles('http://dzieje.pl/wystawa'))) + feeds.append((u'Edukacja', self.find_articles( 'http://dzieje.pl/edukacja'))) - feeds.append((u"Dzieje się", self.find_articles( + feeds.append((u'Dzieje się', self.find_articles( 'http://dzieje.pl/wydarzenia'))) return feeds diff --git a/recipes/dziennik_pl.recipe b/recipes/dziennik_pl.recipe index aa89e55823..58bceab229 100644 --- a/recipes/dziennik_pl.recipe +++ b/recipes/dziennik_pl.recipe @@ -21,10 +21,10 @@ class Dziennik_pl(BasicNewsRecipe): remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}' - preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile( - '

>>> CZYTAJ TAKŻE: ".*?"

'), lambda m: '')] + preprocess_regexps = [(re.compile(r'Komentarze:'), lambda m: ''), (re.compile( + r'

>>> CZYTAJ TAKŻE: ".*?"

'), lambda m: '')] keep_only_tags = [dict(id='article')] - remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa + remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501 feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'), (u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'), (u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'), diff --git a/recipes/dziennik_polski.recipe b/recipes/dziennik_polski.recipe index 51263ef9ab..08aa849e6d 100644 --- a/recipes/dziennik_polski.recipe +++ b/recipes/dziennik_polski.recipe @@ -35,7 +35,7 @@ class DziennikPolski24(BasicNewsRecipe): keep_only_tags = [ - dict(name='div', attrs={'class': ['toolbar']}), dict(name='h1'), dict(name='h2', attrs={'class': ['teaser']}), dict(name='div', attrs={'class': ['picture']}), dict(name='div', attrs={'id': ['showContent']}), dict(name='div', attrs={'class': ['paging']}), dict(name='div', attrs={'class': ['wykupTresc']}) # noqa + dict(name='div', attrs={'class': ['toolbar']}), dict(name='h1'), dict(name='h2', attrs={'class': ['teaser']}), dict(name='div', attrs={'class': ['picture']}), dict(name='div', attrs={'id': ['showContent']}), dict(name='div', attrs={'class': ['paging']}), dict(name='div', attrs={'class': ['wykupTresc']}) # noqa: E501 ] remove_tags = [ @@ -120,7 +120,7 @@ class DziennikPolski24(BasicNewsRecipe): if self.username is not None and self.password is not None: br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html') br.select_form(nr=1) - br["user_login[login]"] = self.username + br['user_login[login]'] = self.username br['user_login[pass]'] = self.password br.submit() return br diff --git a/recipes/dziennik_wschodni.recipe b/recipes/dziennik_wschodni.recipe index f091dd9551..13ec01e240 100644 --- a/recipes/dziennik_wschodni.recipe +++ b/recipes/dziennik_wschodni.recipe @@ -20,8 +20,8 @@ class DziennikWschodni(BasicNewsRecipe): no_stylesheets = True ignore_duplicate_articles = {'title', 'url'} - preprocess_regexps = [(re.compile(u'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa - (re.compile(u'Przeczytaj również:.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa + preprocess_regexps = [(re.compile(u'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa: E501, RUF039 + (re.compile(u'Przeczytaj również:.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa: E501, RUF039 keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', @@ -52,16 +52,14 @@ class DziennikWschodni(BasicNewsRecipe): self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) def append_page(self, soup, appendtag): tag = soup.find('span', attrs={'class': 'photoNavigationPages'}) if tag: number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + \ - soup.find(attrs={'class': 'photoNavigationNext'})['href'][:-1] + baseurl = self.INDEX + soup.find(attrs={'class': 'photoNavigationNext'})['href'][:-1] for r in appendtag.findAll(attrs={'class': 'photoNavigation'}): r.extract() diff --git a/recipes/echo_moskvy.recipe b/recipes/echo_moskvy.recipe index 303ca03858..1f2600f77d 100644 --- a/recipes/echo_moskvy.recipe +++ b/recipes/echo_moskvy.recipe @@ -22,7 +22,7 @@ class EchoMsk(BasicNewsRecipe): remove_tags_after = dict(name='article') - remove_tags = [ + remove_tags = [ dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}), dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}), dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}) diff --git a/recipes/economist.recipe b/recipes/economist.recipe index b562e7e501..883a7e4c5d 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -63,7 +63,7 @@ def load_article_from_json(raw, root): body = root.xpath('//body')[0] article = E(body, 'article') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') - E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') try: date = data['dateModified'] @@ -97,8 +97,8 @@ def process_web_node(node): return f'

{node.get("textHtml")}

' return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': - alt = "" if node.get("altText") is None else node.get("altText") - cap = "" + alt = '' if node.get('altText') is None else node.get('altText') + cap = '' if node.get('caption'): if node['caption'].get('textHtml') is not None: cap = node['caption']['textHtml'] @@ -123,7 +123,7 @@ def load_article_from_web_json(raw): data = json.loads(raw)['props']['pageProps']['cp2Content'] body += f'
{data.get("flyTitle", "")}
' body += f'

{data["headline"]}

' - if data.get("rubric") and data.get("rubric") is not None: + if data.get('rubric') and data.get('rubric') is not None: body += f'
{data.get("rubric", "")}
' try: date = data['dateModified'] @@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = "Kovid Goyal" + __author__ = 'Kovid Goyal' description = ( 'Global news and current affairs from a European' ' perspective. Best downloaded on Friday mornings (GMT)' @@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={'id': 'player'}), dict(attrs={ 'class': [ @@ -252,7 +252,7 @@ class Economist(BasicNewsRecipe): def get_browser(self, *args, **kwargs): if self.from_archive: kwargs['user_agent'] = ( - 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' # noqa + 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' ) br = BasicNewsRecipe.get_browser(self, *args, **kwargs) else: @@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe): if edition_date and isinstance(edition_date, str): return parse_only_date(edition_date, as_utc=False) try: - url = self.browser.open("https://www.economist.com/printedition").geturl() + url = self.browser.open('https://www.economist.com/printedition').geturl() except Exception as e: self.log('Failed to fetch publication date with error: ' + str(e)) return super().publication_date() - return parse_only_date(url.split("/")[-1], as_utc=False) + return parse_only_date(url.split('/')[-1], as_utc=False) def economist_test_article(self): return [('Articles', [{'title':'test', @@ -311,7 +311,7 @@ class Economist(BasicNewsRecipe): # return self.economist_test_article() # url = 'https://www.economist.com/weeklyedition/archive' query = { - 'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa + 'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa: E501 'operationName': 'LatestWeeklyAutoEditionQuery', 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}', } @@ -319,7 +319,7 @@ class Economist(BasicNewsRecipe): content_id = self.get_content_id(edition_date) if content_id: query = { - 'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa + 'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa: E501 'operationName': 'SpecificWeeklyEditionQuery', 'variables': '{{"path":"{}"}}'.format(content_id), } @@ -339,9 +339,9 @@ class Economist(BasicNewsRecipe): def economist_parse_index(self, raw): # edition_date = self.recipe_specific_options.get('date') # if edition_date and isinstance(edition_date, str): - # data = json.loads(raw)['data']['section'] + # data = json.loads(raw)['data']['section'] # else: - # data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] + # data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] dt = datetime.fromisoformat(data['datePublished'][:-1]) + timedelta(seconds=time.timezone) dt = dt.strftime('%b %d, %Y') @@ -364,25 +364,25 @@ class Economist(BasicNewsRecipe): self.log('Got cover:', self.cover_url, '\n', self.description) feeds_dict = defaultdict(list) - for part in safe_dict(data, "hasPart", "parts"): + for part in safe_dict(data, 'hasPart', 'parts'): try: section = part['articleSection']['internal'][0]['title'] except Exception: section = safe_dict(part, 'print', 'section', 'title') or 'section' if section not in feeds_dict: self.log(section) - title = safe_dict(part, "title") - desc = safe_dict(part, "rubric") or '' - sub = safe_dict(part, "flyTitle") or '' + title = safe_dict(part, 'title') + desc = safe_dict(part, 'rubric') or '' + sub = safe_dict(part, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc pt = PersistentTemporaryFile('.html') pt.write(json.dumps(part).encode('utf-8')) pt.close() url = 'file:///' + pt.name - feeds_dict[section].append({"title": title, "url": url, "description": desc}) + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) self.log('\t', title, '\n\t\t', desc) - return [(section, articles) for section, articles in feeds_dict.items()] + return list(feeds_dict.items()) def populate_article_metadata(self, article, soup, first): if not self.from_archive: @@ -409,9 +409,9 @@ class Economist(BasicNewsRecipe): load_article_from_json(raw, root) if '/interactive/' in url: - return '

' + root.xpath('//h1')[0].text + '

' \ - + 'This article is supposed to be read in a browser' \ - + '
' + return ('

' + root.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') for div in root.xpath('//div[@class="lazy-image"]'): noscript = list(div.iter('noscript')) @@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe): return self.economist_return_index(ans) def economist_parse_web_index(self, soup): - script_tag = soup.find("script", id="__NEXT_DATA__") + script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag is not None: data = json.loads(script_tag.string) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) - self.description = safe_dict(data, "props", "pageProps", "content", "headline") - self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']' - self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace( + self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline') + self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']' + self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace( 'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '') self.log('Got cover:', self.cover_url) feeds = [] for part in safe_dict( - data, "props", "pageProps", "content", "headerSections" - ) + safe_dict(data, "props", "pageProps", "content", "sections"): - section = safe_dict(part, "name") or '' + data, 'props', 'pageProps', 'content', 'headerSections' + ) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'): + section = safe_dict(part, 'name') or '' if not section: continue self.log(section) @@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe): articles = [] for ar in part['articles']: - title = safe_dict(ar, "headline") or '' - url = process_url(safe_dict(ar, "url") or '') + title = safe_dict(ar, 'headline') or '' + url = process_url(safe_dict(ar, 'url') or '') if not title or not url: continue - desc = safe_dict(ar, "rubric") or '' - sub = safe_dict(ar, "flyTitle") or '' + desc = safe_dict(ar, 'rubric') or '' + sub = safe_dict(ar, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc self.log('\t', title, '\n\t', desc, '\n\t\t', url) @@ -555,12 +555,9 @@ class Economist(BasicNewsRecipe): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root_ = parse(raw) if '/interactive/' in url: - return ( - '

' - + root_.xpath('//h1')[0].text + '

' - + 'This article is supposed to be read in a browser' - + '
' - ) + return ('

' + root_.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser' + '
') script = root_.xpath('//script[@id="__NEXT_DATA__"]') diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe index f976771790..ed94653431 100644 --- a/recipes/economist_espresso.recipe +++ b/recipes/economist_espresso.recipe @@ -57,8 +57,8 @@ def load_article_from_json(raw, root): data = json.loads(raw) body = root.xpath('//body')[0] article = E(body, 'article') - E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') - E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') + E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['byline'], style='font-style: italic; color:#202020;') main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical') @@ -130,7 +130,7 @@ class Espresso(BasicNewsRecipe): remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={ 'class': [ 'dblClkTrk', 'ec-article-info', 'share_inline_header', @@ -172,7 +172,7 @@ class Espresso(BasicNewsRecipe): def parse_index(self): query = { - 'query': 'query EspressoQuery($ref:String!){espresso:canonical(ref:$ref){...EspressoFragment __typename}}fragment EspressoFragment on Content{id type hasPart(size:1 sort:"datePublished:desc"){parts{id type rubric:description hasPart(sort:"publication.context.position:asc,datePublished:desc"){parts{...ArticleFragment __typename}__typename}__typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa + 'query': 'query EspressoQuery($ref:String!){espresso:canonical(ref:$ref){...EspressoFragment __typename}}fragment EspressoFragment on Content{id type hasPart(size:1 sort:"datePublished:desc"){parts{id type rubric:description hasPart(sort:"publication.context.position:asc,datePublished:desc"){parts{...ArticleFragment __typename}__typename}__typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501 'operationName': 'EspressoQuery', 'variables': '{"ref":"/content/jakj5ed3rml75i8j0d5i74p8adf6eem4"}', } @@ -189,13 +189,13 @@ class Espresso(BasicNewsRecipe): self.description = data['rubric'] ans = [] - for part in safe_dict(data, "hasPart", "parts"): - title = safe_dict(part, "title") + for part in safe_dict(data, 'hasPart', 'parts'): + title = safe_dict(part, 'title') pt = PersistentTemporaryFile('.html') pt.write(json.dumps(part).encode('utf-8')) pt.close() url = 'file:///' + pt.name - ans.append({"title": title, "url": url}) + ans.append({'title': title, 'url': url}) return [('Espresso', ans)] def preprocess_html(self, soup): @@ -242,7 +242,6 @@ class Espresso(BasicNewsRecipe): raw = etree.tostring(root, encoding='unicode') return raw - def eco_find_image_tables(self, soup): for x in soup.findAll('table', align=['right', 'center']): if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1: diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index b562e7e501..883a7e4c5d 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -63,7 +63,7 @@ def load_article_from_json(raw, root): body = root.xpath('//body')[0] article = E(body, 'article') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') - E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') try: date = data['dateModified'] @@ -97,8 +97,8 @@ def process_web_node(node): return f'

{node.get("textHtml")}

' return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': - alt = "" if node.get("altText") is None else node.get("altText") - cap = "" + alt = '' if node.get('altText') is None else node.get('altText') + cap = '' if node.get('caption'): if node['caption'].get('textHtml') is not None: cap = node['caption']['textHtml'] @@ -123,7 +123,7 @@ def load_article_from_web_json(raw): data = json.loads(raw)['props']['pageProps']['cp2Content'] body += f'
{data.get("flyTitle", "")}
' body += f'

{data["headline"]}

' - if data.get("rubric") and data.get("rubric") is not None: + if data.get('rubric') and data.get('rubric') is not None: body += f'
{data.get("rubric", "")}
' try: date = data['dateModified'] @@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = "Kovid Goyal" + __author__ = 'Kovid Goyal' description = ( 'Global news and current affairs from a European' ' perspective. Best downloaded on Friday mornings (GMT)' @@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={'id': 'player'}), dict(attrs={ 'class': [ @@ -252,7 +252,7 @@ class Economist(BasicNewsRecipe): def get_browser(self, *args, **kwargs): if self.from_archive: kwargs['user_agent'] = ( - 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' # noqa + 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' ) br = BasicNewsRecipe.get_browser(self, *args, **kwargs) else: @@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe): if edition_date and isinstance(edition_date, str): return parse_only_date(edition_date, as_utc=False) try: - url = self.browser.open("https://www.economist.com/printedition").geturl() + url = self.browser.open('https://www.economist.com/printedition').geturl() except Exception as e: self.log('Failed to fetch publication date with error: ' + str(e)) return super().publication_date() - return parse_only_date(url.split("/")[-1], as_utc=False) + return parse_only_date(url.split('/')[-1], as_utc=False) def economist_test_article(self): return [('Articles', [{'title':'test', @@ -311,7 +311,7 @@ class Economist(BasicNewsRecipe): # return self.economist_test_article() # url = 'https://www.economist.com/weeklyedition/archive' query = { - 'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa + 'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa: E501 'operationName': 'LatestWeeklyAutoEditionQuery', 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}', } @@ -319,7 +319,7 @@ class Economist(BasicNewsRecipe): content_id = self.get_content_id(edition_date) if content_id: query = { - 'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa + 'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa: E501 'operationName': 'SpecificWeeklyEditionQuery', 'variables': '{{"path":"{}"}}'.format(content_id), } @@ -339,9 +339,9 @@ class Economist(BasicNewsRecipe): def economist_parse_index(self, raw): # edition_date = self.recipe_specific_options.get('date') # if edition_date and isinstance(edition_date, str): - # data = json.loads(raw)['data']['section'] + # data = json.loads(raw)['data']['section'] # else: - # data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] + # data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] dt = datetime.fromisoformat(data['datePublished'][:-1]) + timedelta(seconds=time.timezone) dt = dt.strftime('%b %d, %Y') @@ -364,25 +364,25 @@ class Economist(BasicNewsRecipe): self.log('Got cover:', self.cover_url, '\n', self.description) feeds_dict = defaultdict(list) - for part in safe_dict(data, "hasPart", "parts"): + for part in safe_dict(data, 'hasPart', 'parts'): try: section = part['articleSection']['internal'][0]['title'] except Exception: section = safe_dict(part, 'print', 'section', 'title') or 'section' if section not in feeds_dict: self.log(section) - title = safe_dict(part, "title") - desc = safe_dict(part, "rubric") or '' - sub = safe_dict(part, "flyTitle") or '' + title = safe_dict(part, 'title') + desc = safe_dict(part, 'rubric') or '' + sub = safe_dict(part, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc pt = PersistentTemporaryFile('.html') pt.write(json.dumps(part).encode('utf-8')) pt.close() url = 'file:///' + pt.name - feeds_dict[section].append({"title": title, "url": url, "description": desc}) + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) self.log('\t', title, '\n\t\t', desc) - return [(section, articles) for section, articles in feeds_dict.items()] + return list(feeds_dict.items()) def populate_article_metadata(self, article, soup, first): if not self.from_archive: @@ -409,9 +409,9 @@ class Economist(BasicNewsRecipe): load_article_from_json(raw, root) if '/interactive/' in url: - return '

' + root.xpath('//h1')[0].text + '

' \ - + 'This article is supposed to be read in a browser' \ - + '
' + return ('

' + root.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') for div in root.xpath('//div[@class="lazy-image"]'): noscript = list(div.iter('noscript')) @@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe): return self.economist_return_index(ans) def economist_parse_web_index(self, soup): - script_tag = soup.find("script", id="__NEXT_DATA__") + script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag is not None: data = json.loads(script_tag.string) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) - self.description = safe_dict(data, "props", "pageProps", "content", "headline") - self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']' - self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace( + self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline') + self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']' + self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace( 'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '') self.log('Got cover:', self.cover_url) feeds = [] for part in safe_dict( - data, "props", "pageProps", "content", "headerSections" - ) + safe_dict(data, "props", "pageProps", "content", "sections"): - section = safe_dict(part, "name") or '' + data, 'props', 'pageProps', 'content', 'headerSections' + ) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'): + section = safe_dict(part, 'name') or '' if not section: continue self.log(section) @@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe): articles = [] for ar in part['articles']: - title = safe_dict(ar, "headline") or '' - url = process_url(safe_dict(ar, "url") or '') + title = safe_dict(ar, 'headline') or '' + url = process_url(safe_dict(ar, 'url') or '') if not title or not url: continue - desc = safe_dict(ar, "rubric") or '' - sub = safe_dict(ar, "flyTitle") or '' + desc = safe_dict(ar, 'rubric') or '' + sub = safe_dict(ar, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc self.log('\t', title, '\n\t', desc, '\n\t\t', url) @@ -555,12 +555,9 @@ class Economist(BasicNewsRecipe): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root_ = parse(raw) if '/interactive/' in url: - return ( - '

' - + root_.xpath('//h1')[0].text + '

' - + 'This article is supposed to be read in a browser' - + '
' - ) + return ('

' + root_.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser' + '
') script = root_.xpath('//script[@id="__NEXT_DATA__"]') diff --git a/recipes/economist_news.recipe b/recipes/economist_news.recipe index 2c3372d86f..e08f7ba91d 100644 --- a/recipes/economist_news.recipe +++ b/recipes/economist_news.recipe @@ -59,7 +59,7 @@ def load_article_from_json(raw, root): body = root.xpath('//body')[0] article = E(body, 'article') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') - E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') try: date = data['dateModified'] @@ -125,7 +125,7 @@ class EconomistNews(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = "Kovid Goyal" + __author__ = 'Kovid Goyal' description = ( 'Global news and current affairs from a European' ' perspective. Get the latest articles here.' @@ -140,7 +140,7 @@ class EconomistNews(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={'id': 'player'}), dict(attrs={ 'class': [ @@ -209,7 +209,7 @@ class EconomistNews(BasicNewsRecipe): def parse_index(self): query = { - 'query': 'query HomeQuery($homeId:String!$relatedId:String!$podcastsId:String!){canonical(ref:$homeId){hasPart{parts{id title:headline cta{text url __typename}type hasPart{parts{isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}...ArticleFragment hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}relatedTopStories:canonical(ref:$relatedId){id title:headline hasPart(size:2 sort:"datePublished:desc"){parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}podcasts:canonical(ref:$podcastsId){id title:headline hasPart(size:6 sort:"datePublished:desc"){parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa + 'query': 'query HomeQuery($homeId:String!$relatedId:String!$podcastsId:String!){canonical(ref:$homeId){hasPart{parts{id title:headline cta{text url __typename}type hasPart{parts{isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}...ArticleFragment hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}hasPart{parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}__typename}__typename}relatedTopStories:canonical(ref:$relatedId){id title:headline hasPart(size:2 sort:"datePublished:desc"){parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}podcasts:canonical(ref:$podcastsId){id title:headline hasPart(size:6 sort:"datePublished:desc"){parts{...ArticleFragment isPartOf{id context{title:headline flyTitle:subheadline rubric:description dateline image{...ImageMainFragment ...ImagePromoFragment __typename}__typename}__typename}__typename}__typename}__typename}}fragment ArticleFragment on Content{articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa: E501 'operationName': 'HomeQuery', 'variables': '{"homeId":"/content/8mmm7h9v7arvfpvn4n20hakmg4ugatur",' '"relatedId":"/content/c7kho74htgua3gif74fa4bnbjr64i1js","podcastsId":"/content/omi23dr8h15h8c33t2gkb2cju8ap758o"}', @@ -234,9 +234,9 @@ class EconomistNews(BasicNewsRecipe): articles = [] for art in part['hasPart']['parts']: - title = safe_dict(art, "title") - desc = safe_dict(art, "rubric") or '' - sub = safe_dict(art, "flyTitle") or '' + title = safe_dict(art, 'title') + desc = safe_dict(art, 'rubric') or '' + sub = safe_dict(art, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc if not art.get('text'): @@ -249,7 +249,7 @@ class EconomistNews(BasicNewsRecipe): pt.write(json.dumps(art).encode('utf-8')) pt.close() url = 'file:///' + pt.name - articles.append({"title": title, "url": url, "description": desc}) + articles.append({'title': title, 'url': url, 'description': desc}) self.log('\t', title, '\n\t\t', desc) if articles: feeds.append((section, articles)) @@ -276,9 +276,9 @@ class EconomistNews(BasicNewsRecipe): load_article_from_json(raw, root) if '/interactive/' in url: - return '

' + root.xpath('//h1')[0].text + '

' \ - + 'This article is supposed to be read in a browser' \ - + '
' + return ('

' + root.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') for div in root.xpath('//div[@class="lazy-image"]'): noscript = list(div.iter('noscript')) diff --git a/recipes/economist_search.recipe b/recipes/economist_search.recipe index a0c621e7d2..5ac61ad57f 100644 --- a/recipes/economist_search.recipe +++ b/recipes/economist_search.recipe @@ -23,8 +23,8 @@ def process_node(node): return f'

{node.get("textHtml")}

' return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': - alt = "" if node.get("altText") is None else node.get("altText") - cap = "" + alt = '' if node.get('altText') is None else node.get('altText') + cap = '' if node.get('caption'): if node['caption'].get('textHtml') is not None: cap = node['caption']['textHtml'] @@ -78,6 +78,7 @@ def load_article_from_json(raw): body += process_node(node) return '
' + body + '
' + def cleanup_html_article(root): main = root.xpath('//main')[0] body = root.xpath('//body')[0] @@ -91,17 +92,20 @@ def cleanup_html_article(root): for x in root.xpath('//button'): x.getparent().remove(x) + def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + def new_tag(soup, name, attrs=()): impl = getattr(soup, 'new_tag', None) if impl is not None: return impl(name, attrs=dict(attrs)) return Tag(soup, name, attrs=attrs or None) + def process_url(url): if url.startswith('/'): url = 'https://www.economist.com' + url @@ -112,12 +116,12 @@ class econ_search(BasicNewsRecipe): title = 'The Economist - Search' language = 'en' encoding = 'utf-8' - __author__ = "unkn0wn" + __author__ = 'unkn0wn' description = ( 'Use the Advanced section of the recipe to search.' ) - remove_attributes = ['style', 'height', 'width'] + remove_attributes = ['data-reactid', 'style', 'height', 'width'] no_stylesheets = True ignore_duplicate_articles = {'url'} extra_css = ''' @@ -128,7 +132,7 @@ class econ_search(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={'id':'player'}), dict(attrs={ 'class': [ @@ -150,15 +154,13 @@ class econ_search(BasicNewsRecipe): ) ] keep_only_tags = [dict(name='article', id=lambda x: not x)] - no_stylesheets = True - remove_attributes = ['data-reactid', 'width', 'height'] # economist.com has started throttling after about 60% of the total has # downloaded with connection reset by peer (104) errors. delay = 3 def get_browser(self, *args, **kwargs): kwargs['user_agent'] = ( - 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' # noqa + 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' ) br = BasicNewsRecipe.get_browser(self, *args, **kwargs) return br @@ -167,12 +169,9 @@ class econ_search(BasicNewsRecipe): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root_ = parse(raw) if '/interactive/' in url: - return ( - '

' - + root_.xpath('//h1')[0].text + '

' - + 'This article is supposed to be read in a browser' - + '
' - ) + return ('

' + root_.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') script = root_.xpath('//script[@id="__NEXT_DATA__"]') diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 3e9b177370..6d7e2336ee 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -23,8 +23,8 @@ def process_node(node): return f'

{node.get("textHtml")}

' return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': - alt = "" if node.get("altText") is None else node.get("altText") - cap = "" + alt = '' if node.get('altText') is None else node.get('altText') + cap = '' if node.get('caption'): if node['caption'].get('textHtml') is not None: cap = node['caption']['textHtml'] @@ -122,7 +122,7 @@ class EconomistWorld(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = "unkn0wn" + __author__ = 'unkn0wn' description = ( 'The World Ahead is The Economist’s future-gazing publication. It prepares audiences for what is to ' 'come with mind-stretching insights and expert analysis—all in The Economist’s clear, elegant style.' @@ -136,7 +136,7 @@ class EconomistWorld(BasicNewsRecipe): resolve_internal_links = True remove_tags = [ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), - dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={'aria-label': 'Article Teaser'}), dict(attrs={'id': 'player'}), dict(attrs={ 'class': [ @@ -178,7 +178,7 @@ class EconomistWorld(BasicNewsRecipe): def get_browser(self, *args, **kwargs): kwargs['user_agent'] = ( - 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' # noqa + 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' ) br = BasicNewsRecipe.get_browser(self, *args, **kwargs) return br @@ -205,24 +205,24 @@ class EconomistWorld(BasicNewsRecipe): return self.economist_return_index(ans) def economist_parse_index(self, soup): - script_tag = soup.find("script", id="__NEXT_DATA__") + script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag is not None: data = json.loads(script_tag.string) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) - self.title = safe_dict(data, "props", "pageProps", "content", "headline") + self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline') self.cover_url = 'https://mma.prnewswire.com/media/2561745/The_Economist_World_Ahead_2025_cover.jpg?w=600' feeds = [] - for coll in safe_dict(data, "props", "pageProps", "content", "components"): - section = safe_dict(coll, "headline") or '' + for coll in safe_dict(data, 'props', 'pageProps', 'content', 'components'): + section = safe_dict(coll, 'headline') or '' self.log(section) articles = [] - for part in safe_dict(coll, "items"): - title = safe_dict(part, "headline") or '' - url = process_url(safe_dict(part, "url") or '') - desc = safe_dict(part, "rubric") or '' - sub = safe_dict(part, "flyTitle") or '' + for part in safe_dict(coll, 'items'): + title = safe_dict(part, 'headline') or '' + url = process_url(safe_dict(part, 'url') or '') + desc = safe_dict(part, 'rubric') or '' + sub = safe_dict(part, 'flyTitle') or '' if sub and section != sub: desc = sub + ' :: ' + desc self.log('\t', title, '\n\t', desc, '\n\t\t', url) @@ -245,12 +245,9 @@ class EconomistWorld(BasicNewsRecipe): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root_ = parse(raw) if '/interactive/' in url: - return ( - '

' - + root_.xpath('//h1')[0].text + '

' - + 'This article is supposed to be read in a browser' - + '
' - ) + return ('

' + root_.xpath('//h1')[0].text + '

' + 'This article is supposed to be read in a browser.' + '
') script = root_.xpath('//script[@id="__NEXT_DATA__"]') diff --git a/recipes/edmonton_journal.recipe b/recipes/edmonton_journal.recipe index 5395ba7bda..f6a054a5e0 100644 --- a/recipes/edmonton_journal.recipe +++ b/recipes/edmonton_journal.recipe @@ -60,28 +60,28 @@ class CanWestPaper(BasicNewsRecipe): ] # un-comment the following six lines for the Vancouver Province - # title = u'Vancouver Province' - # url_prefix = 'http://www.theprovince.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' - # logo_url = 'vplogo.jpg' - # fp_tag = 'CAN_TP' + # # title = u'Vancouver Province' + # # url_prefix = 'http://www.theprovince.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' + # # logo_url = 'vplogo.jpg' + # # fp_tag = 'CAN_TP' # un-comment the following six lines for the Vancouver Sun - # title = u'Vancouver Sun' - # url_prefix = 'http://www.vancouversun.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' - # logo_url = 'vslogo.jpg' - # fp_tag = 'CAN_VS' + # # title = u'Vancouver Sun' + # # url_prefix = 'http://www.vancouversun.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' + # # logo_url = 'vslogo.jpg' + # # fp_tag = 'CAN_VS' # un-comment the following six lines for the Calgary Herald - # title = u'Calgary Herald' - # url_prefix = 'http://www.calgaryherald.com' - # description = u'News from Calgary, AB' - # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' - # logo_url = 'chlogo.jpg' - # fp_tag = 'CAN_CH' + # # title = u'Calgary Herald' + # # url_prefix = 'http://www.calgaryherald.com' + # # description = u'News from Calgary, AB' + # # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' + # # logo_url = 'chlogo.jpg' + # # fp_tag = 'CAN_CH' # un-comment the following six lines for the Edmonton Journal title = u'Edmonton Journal' @@ -92,20 +92,20 @@ class CanWestPaper(BasicNewsRecipe): fp_tag = 'CAN_EJ' # un-comment the following six lines for the Ottawa Citizen -# title = u'Ottawa Citizen' -# url_prefix = 'http://www.ottawacitizen.com' -# description = u'News from Ottawa, ON' -# std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' -# logo_url = 'oclogo.jpg' -# fp_tag = 'CAN_OC' + # # title = u'Ottawa Citizen' + # # url_prefix = 'http://www.ottawacitizen.com' + # # description = u'News from Ottawa, ON' + # # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' + # # logo_url = 'oclogo.jpg' + # # fp_tag = 'CAN_OC' # un-comment the following six lines for the Montreal Gazette -# title = u'Montreal Gazette' -# url_prefix = 'http://www.montrealgazette.com' -# description = u'News from Montreal, QC' -# std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg' -# logo_url = 'mglogo.jpg' -# fp_tag = 'CAN_MG' + # # title = u'Montreal Gazette' + # # url_prefix = 'http://www.montrealgazette.com' + # # description = u'News from Montreal, QC' + # # std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg' + # # logo_url = 'mglogo.jpg' + # # fp_tag = 'CAN_MG' Kindle_Fire = False masthead_url = std_logo_url @@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe): .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocredit { font-size: xx-small; font-weight: normal; }''' - keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] + keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})] remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( @@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe): name='div', attrs={'class': 'copyright'}), dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'id': 'soundoff'}), - dict(name='div', attrs={'id': re.compile('flyer')}), + dict(name='div', attrs={'id': re.compile(r'flyer')}), dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] def get_cover_url(self): @@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe): except: while daysback < 7: cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \ - str((date.today() - timedelta(days=daysback)).day) + \ - '/lg/' + self.fp_tag + '.jpg' + str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) @@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe): continue break if daysback == 7: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover def fixChars(self, string): # Replace lsquo (\x91) - fixed = re.sub("\x91", "‘", string) + fixed = string.replace('\x91', '‘') # Replace rsquo (\x92) - fixed = re.sub("\x92", "’", fixed) + fixed = fixed.replace('\x92', '’') # Replace ldquo (\x93) - fixed = re.sub("\x93", "“", fixed) + fixed = fixed.replace('\x93', '“') # Replace rdquo (\x94) - fixed = re.sub("\x94", "”", fixed) + fixed = fixed.replace('\x94', '”') # Replace ndash (\x96) - fixed = re.sub("\x96", "–", fixed) + fixed = fixed.replace('\x96', '–') # Replace mdash (\x97) - fixed = re.sub("\x97", "—", fixed) - fixed = re.sub("’", "’", fixed) + fixed = fixed.replace('\x97', '—') + fixed = fixed.replace('’', '’') return fixed def massageNCXText(self, description): @@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe): divtags = soup.findAll('div', attrs={'id': ''}) if divtags: for div in divtags: - del(div['id']) + del div['id'] pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps @@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe): if url.startswith('/'): url = self.url_prefix + url if not url.startswith(self.url_prefix): - print("Rejected " + url) + print('Rejected ' + url) return if url in self.url_list: - print("Rejected dup " + url) + print('Rejected dup ' + url) return self.url_list.append(url) title = self.tag_to_string(atag, False) @@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe): return dtag = adiv.find('div', 'content') description = '' - print("URL " + url) - print("TITLE " + title) + print('URL ' + url) + print('TITLE ' + title) if dtag is not None: stag = dtag.span if stag is not None: @@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe): description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) - print("DESCRIPTION: " + description) + print('DESCRIPTION: ' + description) if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) def parse_web_index(key, keyurl): - print("Section: " + key + ': ' + self.url_prefix + keyurl) + print('Section: ' + key + ': ' + self.url_prefix + keyurl) try: soup = self.index_to_soup(self.url_prefix + keyurl) except: - print("Section: " + key + ' NOT FOUND') + print('Section: ' + key + ' NOT FOUND') return ans.append(key) mainsoup = soup.find('div', 'bodywrapper') @@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe): for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}): handle_article(wdiv, key) - for (k, url) in self.postmedia_index_pages: + for k,url in self.postmedia_index_pages: parse_web_index(k, url) ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/el_colombiano.recipe b/recipes/el_colombiano.recipe index f557d85bbd..3a39cb0fbb 100644 --- a/recipes/el_colombiano.recipe +++ b/recipes/el_colombiano.recipe @@ -9,8 +9,10 @@ class AdvancedUserRecipe1311790237(BasicNewsRecipe): language = 'es_CO' cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' remove_tags_before = dict(id='contenidoArt') - remove_tags_after = dict(id='enviaTips') - remove_tags_after = dict(id='zonaPata') + remove_tags_after = [ + dict(id='enviaTips'), + dict(id='zonaPata'), + ] oldest_article = 1 max_articles_per_feed = 100 remove_javascript = True @@ -20,12 +22,12 @@ class AdvancedUserRecipe1311790237(BasicNewsRecipe): masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' publication_type = 'newspaper' - extra_css = """ + extra_css = ''' p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } - """ + ''' feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'), (u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'), diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe index 5f42a7aa2e..675dac2e20 100644 --- a/recipes/el_correo.recipe +++ b/recipes/el_correo.recipe @@ -18,10 +18,9 @@ class elcorreo(BasicNewsRecipe): ignore_duplicate_articles = {'url'} encoding = 'utf-8' masthead_url = 'https://s1.ppllstatics.com/starfish/1.3.76/assets/images/logos/logo-elcorreo.svg' - encoding = 'utf-8' remove_empty_feeds = True resolve_internal_links = True - max_articles_per_feed = 25 # articles + max_articles_per_feed = 25 # articles compress_news_images = True recipe_specific_options = { @@ -59,7 +58,7 @@ class elcorreo(BasicNewsRecipe): ] remove_tags = [ - dict(name = ['svg', 'section', 'nav']), + dict(name=['svg', 'section', 'nav']), dict(attrs={'data-voc-component':['dropdown', 'modal', 'slider-grab']}), classes( 'v-mdl-ath__img-c v-adv v-i-b v-mdl-ath__c--2 v-d-cmp-adv v-d-cmp-nws ' @@ -88,14 +87,6 @@ class elcorreo(BasicNewsRecipe): p.name = 'div' return soup - recipe_specific_options = { - 'days': { - 'short': 'Oldest article to download from this news source. In days ', - 'long': 'For example, 0.5, gives you articles from the past 12 hours', - 'default': str(oldest_article) - } - } - def get_browser(self, *args, **kwargs): kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) diff --git a/recipes/el_cultural.recipe b/recipes/el_cultural.recipe index c502fe4e31..737aace9fc 100644 --- a/recipes/el_cultural.recipe +++ b/recipes/el_cultural.recipe @@ -14,7 +14,7 @@ class RevistaElCultural(BasicNewsRecipe): no_stylesheets = True remove_javascript = True - extra_css = 'h1{ font-family: sans-serif; font-size: large; font-weight: bolder; text-align: justify } h2{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h4{ font-family: sans-serif; font-weight: lighter; font-size: medium; font-style: italic; text-align: justify } .rtsArticuloFirma{ font-family: sans-serif; font-size: small; text-align: justify } .column span-13 last{ font-family: sans-serif; font-size: medium; text-align: justify } .rtsImgArticulo{font-family: serif; font-size: small; color: #000000; text-align: justify}' # noqa + extra_css = 'h1{ font-family: sans-serif; font-size: large; font-weight: bolder; text-align: justify } h2{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h4{ font-family: sans-serif; font-weight: lighter; font-size: medium; font-style: italic; text-align: justify } .rtsArticuloFirma{ font-family: sans-serif; font-size: small; text-align: justify } .column span-13 last{ font-family: sans-serif; font-size: medium; text-align: justify } .rtsImgArticulo{font-family: serif; font-size: small; color: #000000; text-align: justify}' # noqa: E501 def preprocess_html(self, soup): for item in soup.findAll(style=True): @@ -55,9 +55,9 @@ class RevistaElCultural(BasicNewsRecipe): if url.startswith('/version_papel/' + titleSection + '/'): url = 'http://www.elcultural.es' + url - self.log('\t\tFound article:', title[0:title.find("|") - 1]) + self.log('\t\tFound article:', title[0:title.find('|') - 1]) self.log('\t\t\t', url) - current_articles.append({'title': title[0:title.find("|") - 1], 'url': url, + current_articles.append({'title': title[0:title.find('|') - 1], 'url': url, 'description': '', 'date': ''}) return current_articles diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe index c9c44e26f0..2374a639cb 100644 --- a/recipes/el_diplo.recipe +++ b/recipes/el_diplo.recipe @@ -1,51 +1,51 @@ # -*- mode: python; coding: utf-8; -*- # vim: set syntax=python fileencoding=utf-8 -__license__ = "GPL v3" -__copyright__ = "2023, Tomás Di Domenico " +__license__ = 'GPL v3' +__copyright__ = '2023, Tomás Di Domenico ' -""" +''' www.eldiplo.org -""" +''' from calibre.web.feeds.news import BasicNewsRecipe class ElDiplo2023(BasicNewsRecipe): - title = "Le Monde Diplomatique - cono sur" - __author__ = "Tomás Di Domenico" - description = "Publicación de Le Monde Diplomatique para el cono sur." - publisher = "Capital Intelectual" - category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World" + title = 'Le Monde Diplomatique - cono sur' + __author__ = 'Tomás Di Domenico' + description = 'Publicación de Le Monde Diplomatique para el cono sur.' + publisher = 'Capital Intelectual' + category = 'News, Politics, Argentina, Uruguay, Paraguay, South America, World' oldest_article = 31 no_stylesheets = True - encoding = "utf8" + encoding = 'utf8' use_embedded_content = False - language = "es_AR" + language = 'es_AR' remove_empty_feeds = True - publication_type = "magazine" + publication_type = 'magazine' delay = 1 simultaneous_downloads = 1 timeout = 8 needs_subscription = True - ignore_duplicate_articles = {"url"} + ignore_duplicate_articles = {'url'} temp_files = [] fetch_retries = 10 handle_gzip = True compress_news_images = True scale_news_images_to_device = True masthead_url = ( - "https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png" + 'https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png' ) - INDEX = "https://www.eldiplo.org/" + INDEX = 'https://www.eldiplo.org/' - conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category} + conversion_options = {'series': 'El Dipló', 'publisher': publisher, 'base_font_size': 8, 'tags': category} - keep_only_tags = [dict(name=["article"])] + keep_only_tags = [dict(name=['article'])] - remove_tags = [dict(name=["button"])] + remove_tags = [dict(name=['button'])] - extra_css = """ + extra_css = ''' .entry-title { text-align: center; } @@ -67,59 +67,59 @@ class ElDiplo2023(BasicNewsRecipe): padding-left: 10%; padding-right: 10%; } - """ + ''' def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: - br.select_form(id="loginform") - br["log"] = self.username - br["pwd"] = self.password + br.select_form(id='loginform') + br['log'] = self.username + br['pwd'] = self.password br.submit() return br def get_cover_url(self): soup_index = self.index_to_soup(self.INDEX) - tag_sumario = soup_index.find("span", text="Sumario") - url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] + tag_sumario = soup_index.find('span', text='Sumario') + url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href'] soup = self.index_to_soup(url_sumario) - container = soup.find("div", class_="px-16") - url = container.find("img")["src"] + container = soup.find('div', class_='px-16') + url = container.find('img')['src'] - return getattr(self, "cover_url", url) + return getattr(self, 'cover_url', url) def _process_article(self, article): - url = article.find("a", href=True, attrs={"class": "title"})["href"] - title = self.tag_to_string(article).replace("Editorial", "Editorial: ") + url = article.find('a', href=True, attrs={'class': 'title'})['href'] + title = self.tag_to_string(article).replace('Editorial', 'Editorial: ') try: - title, authors = title.split(", por") - authors = f"por {authors}" + title, authors = title.split(', por') + authors = f'por {authors}' except ValueError: - authors = "" - self.log("title: ", title, " url: ", url) - return {"title": title, "url": url, "description": authors, "date": ""} + authors = '' + self.log('title: ', title, ' url: ', url) + return {'title': title, 'url': url, 'description': authors, 'date': ''} def preprocess_html(self, soup): - font_size = "90%" + font_size = '90%' # make the footnotes smaller - for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False): - p["style"] = f"font-size: {font_size};" + for p in soup.find('div', id='nota_pie').findChildren('p', recursive=False): + p['style'] = f'font-size: {font_size};' return soup def parse_index(self): soup_index = self.index_to_soup(self.INDEX) - tag_sumario = soup_index.find("span", text="Sumario") + tag_sumario = soup_index.find('span', text='Sumario') if tag_sumario is None: return None - url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] + url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href'] self.log(url_sumario) soup_sumario = self.index_to_soup(url_sumario) @@ -128,20 +128,20 @@ class ElDiplo2023(BasicNewsRecipe): articles = [] dossiers = [] - sumario = soup_sumario.find("div", class_="sumario") + sumario = soup_sumario.find('div', class_='sumario') - for section in sumario.find_all("div", recursive=False): - classes = section.attrs["class"] + for section in sumario.find_all('div', recursive=False): + classes = section.attrs['class'] - if "dossier" in classes: - dtitle = self.tag_to_string(section.find("h3")) + if 'dossier' in classes: + dtitle = self.tag_to_string(section.find('h3')) darticles = [] - for article in section.find_all("div", recursive=False): + for article in section.find_all('div', recursive=False): darticles.append(self._process_article(article)) dossiers.append((dtitle, darticles)) else: articles.append(self._process_article(section)) - feeds.append(("Artículos", articles)) + feeds.append(('Artículos', articles)) feeds += dossiers return feeds diff --git a/recipes/el_pais.recipe b/recipes/el_pais.recipe index ffe83b57dc..d1a929c506 100644 --- a/recipes/el_pais.recipe +++ b/recipes/el_pais.recipe @@ -114,16 +114,16 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;} from datetime import date cover = ('https://srv00.epimg.net/pdf/elpais/snapshot/' + str(date.today().year) + '/' + date.today().strftime('%m') + '/elpais/' + - str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg') + str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg') br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover def image_url_processor(cls, baseurl, url): - splitUrl = url.split("cloudfront-") + splitUrl = url.split('cloudfront-') parsedUrl = 'https://cloudfront-' + splitUrl[1] return parsedUrl diff --git a/recipes/el_pais_babelia.recipe b/recipes/el_pais_babelia.recipe index 88049a91b1..3fff2db54d 100644 --- a/recipes/el_pais_babelia.recipe +++ b/recipes/el_pais_babelia.recipe @@ -36,7 +36,7 @@ class ElPaisBabelia(BasicNewsRecipe): title = self.tag_to_string(post) if str(post).find('class=') > 0: klass = post['class'] - if klass != "": + if klass != '': self.log() self.log('--> post: ', post) self.log('--> url: ', url) diff --git a/recipes/el_pais_uy.recipe b/recipes/el_pais_uy.recipe index 04a5e6f11c..47be554529 100644 --- a/recipes/el_pais_uy.recipe +++ b/recipes/el_pais_uy.recipe @@ -66,7 +66,7 @@ class General(BasicNewsRecipe): index = 'https://www.elpais.com.uy/impresa/' soup = self.index_to_soup(index) link_item = soup.find('a', attrs={'class': 'page-link link-module'}) - # print link_item + # print(link_item) if link_item: cover_url = 'https://www.elpais.com.uy' + link_item.get('href') return cover_url diff --git a/recipes/elcohetealaluna.recipe b/recipes/elcohetealaluna.recipe index 1ff45144b5..4e33f113ef 100644 --- a/recipes/elcohetealaluna.recipe +++ b/recipes/elcohetealaluna.recipe @@ -28,12 +28,12 @@ class elcohetealaluna(BasicNewsRecipe): compress_news_images = True masthead_url = 'https://www.elcohetealaluna.com/wp-content/uploads/2018/06/logo-menu.png' - extra_css = """ + extra_css = ''' body{font-family: Georgia, Times, "Times New Roman", serif} h1,h2,.post-author-name{font-family: Oswald, sans-serif} h2{color: gray} img{margin-top:1em; margin-bottom: 1em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/elcronista-arg.recipe b/recipes/elcronista-arg.recipe index 9cc1f908ae..4ada20b514 100644 --- a/recipes/elcronista-arg.recipe +++ b/recipes/elcronista-arg.recipe @@ -28,10 +28,10 @@ class ElCronistaArg(BasicNewsRecipe): auto_cleanup_keep = '//div[@class="header-bottom"] | //h1 | //h2' ignore_duplicate_articles = {'url'} masthead_url = 'https://www.cronista.com/export/sites/diarioelcronista/arte/v2/lg_cronista_footer.png_665574830.png' - extra_css = """ + extra_css = ''' body{font-family: 'Source Sans Pro', sans-serif} h1,h2,h3,h4{font-family: 'Libre Baskerville', serif} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe index 2a5550ae40..d5361c4407 100644 --- a/recipes/elektroda_pl.recipe +++ b/recipes/elektroda_pl.recipe @@ -29,5 +29,5 @@ class Elektroda(BasicNewsRecipe): feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: for article in feed.articles[:]: - article.title = article.title[article.title.find("::") + 3:] + article.title = article.title[article.title.find('::') + 3:] return feeds diff --git a/recipes/elmundo.recipe b/recipes/elmundo.recipe index 1432aa1715..21c7f832b0 100644 --- a/recipes/elmundo.recipe +++ b/recipes/elmundo.recipe @@ -35,14 +35,14 @@ class ElMundo(BasicNewsRecipe): articles_are_obfuscated = True auto_cleanup = True temp_files = [] - extra_css = """ + extra_css = ''' body{font-family: "PT serif",Georgia,serif,times} .metadata_noticia{font-size: small} .pestana_GDP{font-size: small; font-weight:bold} h1 {color: #333333; font-family: "Clear Sans Bold",Arial,sans-serif,helvetica} .hora{color: red} .update{color: gray} - """ + ''' conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher @@ -83,14 +83,13 @@ class ElMundo(BasicNewsRecipe): cover = self.masthead_url st = time.localtime() year = str(st.tm_year) - month = "%.2d" % st.tm_mon - day = "%.2d" % st.tm_mday - cover = 'http://img.kiosko.net/' + year + '/' + \ - month + '/' + day + '/es/elmundo.750.jpg' + month = '%.2d' % st.tm_mon + day = '%.2d' % st.tm_mday + cover = 'http://img.kiosko.net/' + '/'.join([year, month, day]) + '/es/elmundo.750.jpg' try: self.browser.open(cover) except: - self.log("\nPortada no disponible") + self.log('\nPortada no disponible') return cover def get_obfuscated_article(self, url): @@ -103,7 +102,7 @@ class ElMundo(BasicNewsRecipe): html = response.read() count = tries except: - print("Retrying download...") + print('Retrying download...') count += 1 if html is not None: tfile = PersistentTemporaryFile('_fa.html') diff --git a/recipes/elpais_semanal.recipe b/recipes/elpais_semanal.recipe index 69a0a0fb42..1ce8ab86e2 100644 --- a/recipes/elpais_semanal.recipe +++ b/recipes/elpais_semanal.recipe @@ -21,7 +21,7 @@ class ElPaisSemanal(BasicNewsRecipe): publication_type = 'magazine' masthead_url = 'http://www.elpais.com/im/tit_logo_int.gif' index = 'http://www.elpais.com/suple/eps/' - extra_css = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} ' # noqa + extra_css = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} ' # noqa: E501 conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/elperiodico_spanish.recipe b/recipes/elperiodico_spanish.recipe index b6c591c48f..4aa07b9524 100644 --- a/recipes/elperiodico_spanish.recipe +++ b/recipes/elperiodico_spanish.recipe @@ -36,8 +36,8 @@ class ElPeriodico_cat(BasicNewsRecipe): '--comment', description, '--category', category, '--publisher', publisher ] - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = ('publisher="' + publisher + + '"\ncomments="' + description + '"\ntags="' + category + '"') feeds = [(u'Portada', u'http://www.elperiodico.com/es/rss/rss_portada.xml'), (u'Internacional', u'http://elperiodico.com/es/rss/internacional/rss.xml'), @@ -66,7 +66,7 @@ class ElPeriodico_cat(BasicNewsRecipe): def preprocess_html(self, soup): mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): del item['style'] diff --git a/recipes/en_globes_co_il.recipe b/recipes/en_globes_co_il.recipe index 2ad5aac6af..0dbe822972 100644 --- a/recipes/en_globes_co_il.recipe +++ b/recipes/en_globes_co_il.recipe @@ -18,18 +18,18 @@ class En_Globes_Recipe(BasicNewsRecipe): max_articles_per_feed = 100 feeds = [ - (u"Main Headlines", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942"), - (u"Israeli stocks on Wall Street", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392"), - (u"All news", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725"), - (u"Macro economics", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389"), - (u"Aerospace and defense", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380"), - (u"Real estate", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385"), - (u"Energy and water", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382"), - (u"Start-ups and venture capital", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397"), - (u"Financial services", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383"), - (u"Tel Aviv markets", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404"), - (u"Healthcare", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377"), - (u"Telecommunications", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386"), - (u"Information technology", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376"), - (u"Transport and infrastructure", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388"), + (u'Main Headlines', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942'), + (u'Israeli stocks on Wall Street', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392'), + (u'All news', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725'), + (u'Macro economics', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389'), + (u'Aerospace and defense', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380'), + (u'Real estate', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385'), + (u'Energy and water', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382'), + (u'Start-ups and venture capital', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397'), + (u'Financial services', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383'), + (u'Tel Aviv markets', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404'), + (u'Healthcare', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377'), + (u'Telecommunications', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386'), + (u'Information technology', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376'), + (u'Transport and infrastructure', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388'), ] diff --git a/recipes/endgadget.recipe b/recipes/endgadget.recipe index de5ef2a941..bb8a3c00b4 100644 --- a/recipes/endgadget.recipe +++ b/recipes/endgadget.recipe @@ -38,33 +38,33 @@ class Engadget(BasicNewsRecipe): remove_tags = [ dict(name='div', attrs={'class':'caas-content-byline-wrapper'}), dict(name='div', attrs={'data-component':'ArticleAuthorInfo'}), - classes('commerce-module caas-header caas-prestige-bottom-share caas-share-buttons caas-da caas-3p-blocked commerce-disclaimer notification-upsell-push article-slideshow athena-button email-form') # noqa + classes('commerce-module caas-header caas-prestige-bottom-share caas-share-buttons caas-da caas-3p-blocked commerce-disclaimer notification-upsell-push article-slideshow athena-button email-form') # noqa: E501 ] feeds = [(u'Posts', u'https://www.engadget.com/rss.xml')] def parse_feeds(self): - # Call parent's method. - feeds = BasicNewsRecipe.parse_feeds(self) - # Loop through all feeds. - for feed in feeds: - # Loop through all articles in feed. - for article in feed.articles[:]: - # Remove articles with '...' in the url. - if '/deals/' in article.url: - print('Removing:',article.title) - feed.articles.remove(article) - # Remove articles with '...' in the title. - elif 'best tech deals' in article.title: - print('Removing:',article.title) - feed.articles.remove(article) - elif 'Podcast' in article.title: - print('Removing:',article.title) - feed.articles.remove(article) - elif 'The Morning After' in article.title: - print('Removing:',article.title) - feed.articles.remove(article) - return feeds + # Call parent's method. + feeds = BasicNewsRecipe.parse_feeds(self) + # Loop through all feeds. + for feed in feeds: + # Loop through all articles in feed. + for article in feed.articles[:]: + # Remove articles with '...' in the url. + if '/deals/' in article.url: + print('Removing:',article.title) + feed.articles.remove(article) + # Remove articles with '...' in the title. + elif 'best tech deals' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + elif 'Podcast' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + elif 'The Morning After' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + return feeds def preprocess_html(self, soup): for attr in 'data-src data-src-mobile'.split(): @@ -87,8 +87,8 @@ class Engadget(BasicNewsRecipe): except KeyError: continue # Reorder the "title" and "content" elements - title_div = soup.find("div", {"class": "caas-title-wrapper"}) - content_div = soup.find("div", {"class": "caas-content-wrapper"}) + title_div = soup.find('div', {'class': 'caas-title-wrapper'}) + content_div = soup.find('div', {'class': 'caas-content-wrapper'}) if title_div and content_div: soup.body.clear() soup.body.append(title_div) diff --git a/recipes/epoch_times.recipe b/recipes/epoch_times.recipe index 26998ea5e3..576676a199 100644 --- a/recipes/epoch_times.recipe +++ b/recipes/epoch_times.recipe @@ -13,7 +13,7 @@ class EpochTimes(BasicNewsRecipe): max_articles_per_feed = 20 ignore_duplicate_articles = {'url'} remove_attributes = ['height', 'width', 'style'] - remove_empty_feeds = True + remove_empty_feeds = True no_stylesheets = True resolve_internal_links = True masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png' diff --git a/recipes/epw_magazine.recipe b/recipes/epw_magazine.recipe index 105ee34f7c..ff258fbc16 100644 --- a/recipes/epw_magazine.recipe +++ b/recipes/epw_magazine.recipe @@ -72,5 +72,5 @@ class epw(BasicNewsRecipe): if section_title not in feeds: feeds[section_title] = [] feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] + ans = list(feeds.items()) return ans diff --git a/recipes/equestria_daily.recipe b/recipes/equestria_daily.recipe index b0d15db6a9..1efeb7bfe1 100644 --- a/recipes/equestria_daily.recipe +++ b/recipes/equestria_daily.recipe @@ -5,12 +5,12 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1639926896(BasicNewsRecipe): - __author__ = "Aisteru" - __copyright__ = "2021, Timothée Andres " + __author__ = 'Aisteru' + __copyright__ = '2021, Timothée Andres ' __license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' - title = "Equestria Daily" - description = "Everything new in Equestria and beyond!" + title = 'Equestria Daily' + description = 'Everything new in Equestria and beyond!' language = 'en' # Max. supported by website: 50 @@ -29,13 +29,13 @@ class AdvancedUserRecipe1639926896(BasicNewsRecipe): # To discard posts under a certain section, simply comment the whole line sections = [ - ("Art", 'Art'), - ("News", 'News'), - ("Fics", 'Fanfiction'), - ("Media", 'Media'), - ("Comics", 'Comic'), - ("Community", 'Community'), - ("Editorial", 'Editorial'), + ('Art', 'Art'), + ('News', 'News'), + ('Fics', 'Fanfiction'), + ('Media', 'Media'), + ('Comics', 'Comic'), + ('Community', 'Community'), + ('Editorial', 'Editorial'), ] def get_masthead_url(self): @@ -51,7 +51,7 @@ class AdvancedUserRecipe1639926896(BasicNewsRecipe): lines = description.split('\n') return '\n'.join([line.strip() for line in lines if len(line.strip()) > 0]) - for (section_name, section_url_name) in self.sections: + for section_name, section_url_name in self.sections: soup = self.index_to_soup( f'https://www.equestriadaily.com/search/label/{section_url_name}?max-results={self.max_articles_per_feed}') articles = soup.select('div.post.hentry') diff --git a/recipes/esenja.recipe b/recipes/esenja.recipe index c17537af9c..5836913ab4 100644 --- a/recipes/esenja.recipe +++ b/recipes/esenja.recipe @@ -33,7 +33,6 @@ class Esensja(BasicNewsRecipe): dict(attrs={'class': ['tekst_koniec', 'ref', 'wykop']}), dict(attrs={'itemprop': ['copyrightHolder', 'publisher']}), dict(id='komentarze') - ] extra_css = ''' @@ -45,19 +44,16 @@ class Esensja(BasicNewsRecipe): ''' preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''), - (re.compile( - u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''), - ] + (re.compile(r'(title|alt)="[^"]*?"', re.DOTALL), lambda match: '')] def parse_index(self): soup = self.index_to_soup('http://www.esensja.pl/magazyn/') - a = soup.find('a', attrs={'href': re.compile('.*/index.html')}) + a = soup.find('a', attrs={'href': re.compile(r'.*/index.html')}) year = a['href'].split('/')[0] month = a['href'].split('/')[1] self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/' soup = self.index_to_soup(self.HREF + '01.html') - self.cover_url = 'http://www.esensja.pl/magazyn/' + \ - year + '/' + month + '/img/ilustr/cover_b.jpg' + self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg' feeds = [] chapter = '' subchapter = '' @@ -149,7 +145,7 @@ class Esensja(BasicNewsRecipe): info = tag.find(attrs={'class': 'img_info'}) text = str(tag) if not src: - src = re.search('src="[^"]*?"', text) + src = re.search(r'src="[^"]*?"', text) if src: src = src.group(0) src = src[5:].replace('//', '/') diff --git a/recipes/esensja_(rss).recipe b/recipes/esensja_(rss).recipe index f91607702d..1e301e4822 100644 --- a/recipes/esensja_(rss).recipe +++ b/recipes/esensja_(rss).recipe @@ -23,9 +23,7 @@ class EsensjaRSS(BasicNewsRecipe): remove_javascript = True ignore_duplicate_articles = {'title', 'url'} preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''), - (re.compile( - u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''), - ] + (re.compile(r'(title|alt)="[^"]*?"', re.DOTALL), lambda match: '')] remove_attributes = ['style', 'bgcolor', 'alt', 'color'] keep_only_tags = [dict(attrs={'class': 'sekcja'}), ] remove_tags_after = dict(id='tekst') @@ -53,9 +51,9 @@ class EsensjaRSS(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup(self.INDEX) cover = soup.find(id='panel_1') - self.cover_url = self.INDEX + \ - cover.find('a')['href'].replace( - 'index.html', '') + 'img/ilustr/cover_b.jpg' + self.cover_url = (self.INDEX + + cover.find('a')['href'].replace('index.html', '') + + 'img/ilustr/cover_b.jpg') return getattr(self, 'cover_url', self.cover_url) def append_page(self, soup, appendtag): @@ -95,7 +93,7 @@ class EsensjaRSS(BasicNewsRecipe): info = tag.find(attrs={'class': 'img_info'}) text = str(tag) if not src: - src = re.search('src="[^"]*?"', text) + src = re.search(r'src="[^"]*?"', text) if src: src = src.group(0) src = src[5:].replace('//', '/') diff --git a/recipes/eso_pl.recipe b/recipes/eso_pl.recipe index e4e1143c7f..5f0dbb5456 100644 --- a/recipes/eso_pl.recipe +++ b/recipes/eso_pl.recipe @@ -13,7 +13,10 @@ class ESO(BasicNewsRecipe): remove_empty_feeds = True use_embedded_content = False cover_url = 'https://www.eso.org/public/archives/logos/medium/eso-logo-black-outline.jpg' - keep_only_tags = [dict(attrs={'class': 'subcl'})] + keep_only_tags = [ + dict(attrs={'class': 'subcl'}), + dict(name='div',attrs={'class':'col-md-9 left-column'}), + ] remove_tags = [dict(id='lang_row'), dict( attrs={'class': ['pr_typeid', 'pr_news_feature_link', 'outreach_usage', 'hidden']})] feeds = [ @@ -21,9 +24,6 @@ class ESO(BasicNewsRecipe): (u'Og\u0142oszenia', u'http://www.eso.org/public/poland/announcements/feed/'), (u'Zdj\u0119cie tygodnia', u'http://www.eso.org/public/poland/images/potw/feed/')] - keep_only_tags = [ - dict(name='div',attrs={'class':'col-md-9 left-column'})] - def preprocess_html(self, soup): for a in soup.findAll('a', href=True): if a['href'].startswith('/'): diff --git a/recipes/europa_press.recipe b/recipes/europa_press.recipe index 7cbc7c8cb3..7f268d5038 100644 --- a/recipes/europa_press.recipe +++ b/recipes/europa_press.recipe @@ -32,8 +32,8 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe): name='div', attrs={'id': ['ImprimirEnviarNoticia']}) remove_tags = [ - dict(name='ul', attrs={'id': ['entidadesNoticia', 'MenuSecciones']}), dict(name='div', attrs={'id': ['ImprimirEnviarNoticia', 'PublicidadSuperior', 'CabeceraDerecha', 'Comentarios', 'comentarios full fbConnectAPI', 'ComentarEstaNoticia', 'ctl00_Superior_Main_MasEnChance_cajamasnoticias', 'gl_chn', 'videos_portada_derecha', 'galeria_portada_central', 'galeria_portada_central_boxes']}), dict( # noqa - name='div', attrs={'class': ['infoRelacionada', 'col_1', 'buscador', 'caja doblecolumna strong', 'CHANCE_EP_Encuesta_frontal text', 'seccionportada col_0', 'seccion header', 'text', 'pie caption_over']}), dict(name='a', attrs={'class': ['buscadorLabel']}), dict(name='span', attrs={'class': ['editado']}), dict(name='table'), dict(name='li') ] # noqa + dict(name='ul', attrs={'id': ['entidadesNoticia', 'MenuSecciones']}), dict(name='div', attrs={'id': ['ImprimirEnviarNoticia', 'PublicidadSuperior', 'CabeceraDerecha', 'Comentarios', 'comentarios full fbConnectAPI', 'ComentarEstaNoticia', 'ctl00_Superior_Main_MasEnChance_cajamasnoticias', 'gl_chn', 'videos_portada_derecha', 'galeria_portada_central', 'galeria_portada_central_boxes']}), dict( # noqa: E501 + name='div', attrs={'class': ['infoRelacionada', 'col_1', 'buscador', 'caja doblecolumna strong', 'CHANCE_EP_Encuesta_frontal text', 'seccionportada col_0', 'seccion header', 'text', 'pie caption_over']}), dict(name='a', attrs={'class': ['buscadorLabel']}), dict(name='span', attrs={'class': ['editado']}), dict(name='table'), dict(name='li')] # noqa: E501 feeds = [ diff --git a/recipes/expansion_spanish.recipe b/recipes/expansion_spanish.recipe index 674e9f699f..bc72f2637f 100644 --- a/recipes/expansion_spanish.recipe +++ b/recipes/expansion_spanish.recipe @@ -37,7 +37,7 @@ class expansion_spanish(BasicNewsRecipe): remove_tags = [ dict(name='div', attrs={'class': ['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info', - 'publicidad publicidad_textlink', 'ampliarfoto', 'tit_relacionadas', 'interact', 'paginacion estirar', 'sumario derecha']}), # noqa + 'publicidad publicidad_textlink', 'ampliarfoto', 'tit_relacionadas', 'interact', 'paginacion estirar', 'sumario derecha']}), # noqa: E501 dict(name='ul', attrs={ 'class': ['bolos_desarrollo_noticia', 'not_logged']}), dict(name='span', attrs={'class': ['comentarios']}), @@ -106,15 +106,14 @@ class expansion_spanish(BasicNewsRecipe): cover = None st = time.localtime() year = str(st.tm_year) - month = "%.2d" % st.tm_mon - day = "%.2d" % st.tm_mday - cover = 'http://img5.kiosko.net/' + year + '/' + \ - month + '/' + day + '/es/expansion.750.jpg' + month = '%.2d' % st.tm_mon + day = '%.2d' % st.tm_mday + cover = 'http://img5.kiosko.net/' + '/'.join([year, month, day]) + '/es/expansion.750.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - self.log("\nPortada no disponible") + self.log('\nPortada no disponible') cover = 'http://www.aproahp.org/enlaces/images/diario_expansion.gif' return cover @@ -138,13 +137,13 @@ class expansion_spanish(BasicNewsRecipe): link = article.get('link', None) if link is None: return article - if link.split('/')[-1] == "story01.htm": + if link.split('/')[-1] == 'story01.htm': link = link.split('/')[-2] a = ['0B', '0C', '0D', '0E', '0F', '0G', '0N', '0L0S', '0A'] b = ['.', '/', '?', '-', '=', '&', '.com', 'www.', '0'] - for i in range(0, len(a)): + for i in range(len(a)): link = link.replace(a[i], b[i]) - link = "http://" + link + link = 'http://' + link # Eliminar artículos duplicados en otros feeds diff --git a/recipes/factcheck.recipe b/recipes/factcheck.recipe index 732344ebd0..f7fad0d34c 100644 --- a/recipes/factcheck.recipe +++ b/recipes/factcheck.recipe @@ -13,6 +13,6 @@ class FactCheckOrg(BasicNewsRecipe): masthead_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg' cover_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg' - remove_tags = [dict({'id': ['footer', 'footerabout', 'sidebar']})] + remove_tags = [{'id': ['footer', 'footerabout', 'sidebar']}] feeds = [(u'Factcheck', u'feed://www.factcheck.org/feed/')] diff --git a/recipes/fairbanks_daily.recipe b/recipes/fairbanks_daily.recipe index 6435406cb5..f72f612891 100644 --- a/recipes/fairbanks_daily.recipe +++ b/recipes/fairbanks_daily.recipe @@ -13,7 +13,6 @@ class FairbanksDailyNewsminer(BasicNewsRecipe): remove_javascript = True use_embedded_content = False no_stylesheets = True - language = 'en' encoding = 'utf8' conversion_options = {'linearize_tables': True} auto_cleanup = True diff --git a/recipes/fastcompany.recipe b/recipes/fastcompany.recipe index c867c7b8ae..84af8ec54c 100644 --- a/recipes/fastcompany.recipe +++ b/recipes/fastcompany.recipe @@ -45,15 +45,15 @@ class FastCompany(BasicNewsRecipe): feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')] def get_article_url(self, article): - return article.get('guid', None) + return article.get('guid', None) def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) + ('http-equiv', 'Content-Language'), ('content', self.lang)]) mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) for item in soup.findAll('a'): diff --git a/recipes/faz_net.recipe b/recipes/faz_net.recipe index 90b789a2c4..e1ac34f1b4 100644 --- a/recipes/faz_net.recipe +++ b/recipes/faz_net.recipe @@ -18,22 +18,22 @@ def format_tickaroo_liveblog(soup): for div in soup.findAll('div', attrs={'class':'tik4-content-block tik4-content-block--rich-text tik4-content-block--position-2'}): div.insert_before(soup.new_tag('br')) - - #format liveblogs + # format liveblogs for tag in soup.findAll('time'): - ntag = soup.new_tag("br") + ntag = soup.new_tag('br') tag.insert_before(ntag) - for tag in soup.findAll(class_ = 'tik4-author__wrapper'): - ntag = tag.find(class_ = 'tik4-author__name') + for tag in soup.findAll(class_='tik4-author__wrapper'): + ntag = tag.find(class_='tik4-author__name') if ntag: temp = ntag.extract() temp['class'] = 'tik4-media-body__title' - ntag = tag.find(class_ = 'tik4-author__thumb') + ntag = tag.find(class_='tik4-author__thumb') if ntag and temp: ntag.insert_after(temp) - # process run of images + +# process run of images def bilderstrecke(soup,tag): flag = False try: @@ -56,19 +56,19 @@ def bilderstrecke(soup,tag): for v in struct: if isinstance(v, dict) and 'caption' in v and 'defaultUrl' in v: -# if type(struct[i-1])== str: -# head = soup.new_tag("h4") -# head.append(struct[i-1]) + # if type(struct[i-1])== str: + # head = soup.new_tag("h4") + # head.append(struct[i-1]) cap = soup.new_tag('p') cap.append(struct[int(v['caption'])]) - cap['class'] = "body-elements__image-figcaption" + cap['class'] = 'body-elements__image-figcaption' if 'source' in v.keys(): cred = soup.new_tag('span') cred.append(struct[int(v['source'])]) - cred['class'] = "body-elements__image-figcaption--source" + cred['class'] = 'body-elements__image-figcaption--source' cap.append(cred) if 'defaultUrl' in v.keys(): - fig = soup.new_tag("figure") + fig = soup.new_tag('figure') img = soup.new_tag('img') img['src'] = struct[int(v['defaultUrl'])] fig.append(img) @@ -76,10 +76,10 @@ def bilderstrecke(soup,tag): collect.append(fig) soup.find(class_='header-teaser').insert_after(collect) - for tag in soup.findAll(class_='header-teaser__image--default'): tag.extract() + def story(soup,tag): first_image = soup.find('img',attrs={'loading':'lazy'}) first_caption = soup.find('figcaption',attrs={'class':'caption'}) @@ -93,25 +93,24 @@ class FazNet(BasicNewsRecipe): # original by Armin Geller # overhaul to deal with changes in the faz.net websites - title = 'FAZ.NET' - __author__ = 'Unknown' - description = 'Frankfurter Allgemeine Zeitung' - publisher = 'Frankfurter Allgemeine Zeitung GmbH' - category = 'news, politics, Germany' - cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg' - encoding = 'utf-8' - language = 'de' - ignore_duplicate_articles = {'title', 'url'} - max_articles_per_feed = 30 - no_stylesheets = True - remove_javascript = True - scale_news_images = (10,100) - delay = 1 + title = 'FAZ.NET' + __author__ = 'Unknown' + description = 'Frankfurter Allgemeine Zeitung' + publisher = 'Frankfurter Allgemeine Zeitung GmbH' + category = 'news, politics, Germany' + cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg' + encoding = 'utf-8' + language = 'de' + ignore_duplicate_articles = {'title', 'url'} + max_articles_per_feed = 30 + no_stylesheets = True + remove_javascript = True + scale_news_images = (10,100) + delay = 1 - test_article = 'https://www.faz.net/rss/aktuell/feuilleton/kunst-und-architektur/berlinische-galerie-zeigt-edvard-munch-die-ganze-gefuehlsskala-des-lebens-19180631.html?printPagedArticle=true#pageIndex_2' - test_article = None + test_feed = 'https://www.faz.net/rss/aktuell/feuilleton/kunst-und-architektur/berlinische-galerie-zeigt-edvard-munch-die-ganze-gefuehlsskala-des-lebens-19180631.html?printPagedArticle=true#pageIndex_2' - extra_css = ''' + extra_css = ''' .header-title,.scrolly-title {font-size: 1.5em; font-weight:bold; text-align:left;} .quote {font-size: 1.5em; font-weight:bold; text-align:center;} .author {font-size: 0.7em; font-weight:bold; text-align:center; display:block; @@ -132,11 +131,10 @@ class FazNet(BasicNewsRecipe): keep_only_tags = [dict(name='article', attrs={'class':['article','storytelling']}), dict(name='body'), dict(name='div', attrs={'class':['imageGallery','image_only']}), - dict(name = 'div', attrs ={'class':'tik4-live__container'}), - dict(name = 'script', attrs = {'id':'__NUXT_DATA__'}), + dict(name='div', attrs={'class':'tik4-live__container'}), + dict(name='script', attrs={'id':'__NUXT_DATA__'}), ] - remove_tags = [ dict(name='div', attrs={'class':[ 'related-articles','consent-placeholder', @@ -144,8 +142,8 @@ class FazNet(BasicNewsRecipe): 'tik4-sharing','tik4-load-more-bottom', 'tik4-by','header-detail__image','mm-adbox','upper-toolbar content-container' ]}), - # dict(name ='script'), - dict(name = "style"), + # dict(name ='script'), + dict(name='style'), dict(name='svg'), dict(name='div', attrs={'data-module':'teaser'}), @@ -153,7 +151,6 @@ class FazNet(BasicNewsRecipe): remove_attributes = ['onclick'] - test_article = False if not test_article: feeds = [ @@ -174,11 +171,11 @@ class FazNet(BasicNewsRecipe): else: def parse_index(self): test_article = 'https://www.faz.net/aktuell/stil/mode-im-em-jahr-wir-zeigen-wie-fussball-und-mode-zusammengehoeren-19766969.html' -# test_article = 'https://www.faz.net/aktuell/feuilleton/buecher/film-eruption-ein-thriller-aus-dem-nachlass-von-michael-crichton-19770491.html' -# test_article = 'https://www.faz.net/aktuell/stil/mode-design/leonie-benesch-sandra-hueller-ist-eine-meiner-heldinnen-19671638.html' -# test_article = 'https://www.faz.net/aktuell/feuilleton/medien/sabine-postel-zum-siebzigsten-die-briten-nannten-sie-german-traktor-19708409.html' -# test_article = 'https://www.faz.net/aktuell/stil/mode-design/von-richert-beil-bis-william-fan-wer-kauft-denn-das-19666592.html' - # test_article = 'https://www.faz.net/aktuell/feuilleton/buecher/rezensionen/sachbuch/tom-mustills-buch-die-sprache-der-wale-19657782.html' + # test_article = 'https://www.faz.net/aktuell/feuilleton/buecher/film-eruption-ein-thriller-aus-dem-nachlass-von-michael-crichton-19770491.html' + # test_article = 'https://www.faz.net/aktuell/stil/mode-design/leonie-benesch-sandra-hueller-ist-eine-meiner-heldinnen-19671638.html' + # test_article = 'https://www.faz.net/aktuell/feuilleton/medien/sabine-postel-zum-siebzigsten-die-briten-nannten-sie-german-traktor-19708409.html' + # test_article = 'https://www.faz.net/aktuell/stil/mode-design/von-richert-beil-bis-william-fan-wer-kauft-denn-das-19666592.html' + # test_article = 'https://www.faz.net/aktuell/feuilleton/buecher/rezensionen/sachbuch/tom-mustills-buch-die-sprache-der-wale-19657782.html' if test_article: return [('Articles', [{'title': 'Test article', 'url': test_article}])] soup = self.index_to_soup(self.INDEX) @@ -211,13 +208,13 @@ class FazNet(BasicNewsRecipe): if tag: story(soup,tag) - #Extract images and text from image galleries + # Extract images and text from image galleries for par in soup.findAll('p'): if len(par.contents) == 1: cont = str(par.contents[0]) - if re.search(r"^[1-9]\d* Bilder$",cont): -# print(cont) - for tag in soup.findAll('script',attrs={'id':"__NUXT_DATA__",'type':'application/json'}): + if re.search(r'^[1-9]\d* Bilder$',cont): + # print(cont) + for tag in soup.findAll('script',attrs={'id':'__NUXT_DATA__','type':'application/json'}): bilderstrecke(soup,tag) break break @@ -227,14 +224,14 @@ class FazNet(BasicNewsRecipe): tag.unwrap() # remove ":"" - tag = soup.find(class_ ="header-label__content") + tag = soup.find(class_='header-label__content') if tag: - colon=tag.find(class_ ="sr-only") + colon=tag.find(class_='sr-only') if colon: colon.extract() # Skip articles behind paywall - if soup.find(id = "faz-paywall"): + if soup.find(id='faz-paywall'): self.abort_article('Skipping paywalled article') # Remove F.A.Z. ad @@ -242,11 +239,11 @@ class FazNet(BasicNewsRecipe): if tag.contents[0] and 'F.A.Z.-Newsletter' in tag.contents[0]: tag.extract() -# format liveblog + # format liveblog if soup.find(attrs={'class':'tik4-live__container'}): - format_tickaroo_liveblog(soup) + format_tickaroo_liveblog(soup) -# remove sizes and calc attributes in images + # remove sizes and calc attributes in images for tag in soup.findAll('img'): if tag.has_attr('src'): new_img = soup.new_tag('img') @@ -262,7 +259,7 @@ class FazNet(BasicNewsRecipe): def postprocess_html(self, soup, first_fetch): - #Position point between figure caption and figure credit, where needed + # Position point between figure caption and figure credit, where needed for tag in soup.findAll(attrs={'class':['body-elements__image-figcaption','header-teaser__image-details']}): if tag.string is None: if tag.contents[0].string: @@ -271,5 +268,5 @@ class FazNet(BasicNewsRecipe): text = str(tag.string) text = text.strip() if text != '' and text[-1] not in ['.','?','!',':']: - tag.string.replace_with(text + ".") + tag.string.replace_with(text + '.') return self.adeify_images(soup) diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe index cc278a8517..e08abbd25c 100644 --- a/recipes/film_org_pl.recipe +++ b/recipes/film_org_pl.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class FilmOrgPl(BasicNewsRecipe): title = u'Film.org.pl' __author__ = 'fenuks' - description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." # noqa + description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." # noqa: E501 category = 'film' language = 'pl' cover_url = 'http://film.org.pl/wp-content/uploads/2015/02/film.org.pl_film.org_.pl_kmfviolet4.png' diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index 77c57458be..da8eede0b3 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe): 'ul.sep-line > li + li::before {content: " | "} ' 'ul.inline {padding:0px;} .vertical-align {display: inline-block;}') preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags... - (re.compile(u'(?:)?\\(kliknij\\,\\ aby powiększyć\\)(?:)?', re.IGNORECASE), lambda m: ''), + (re.compile(u'(?:)?\\(kliknij\\,\\ aby powiększyć\\)(?:)?', re.IGNORECASE), lambda m: ''), # noqa: RUF039 (re.compile(type(u'')(r'(
\s*?
\s*?)+'), re.IGNORECASE), lambda m: '
') ] remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index 337f6ea949..5df77f095d 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes class ft(BasicNewsRecipe): title = 'Financial Times' language = 'en' - __author__ = "Kovid Goyal" + __author__ = 'Kovid Goyal' description = 'The Financial Times is one of the world’s leading news organisations, recognised internationally for its authority, integrity and accuracy.' oldest_article = 1.15 max_articles_per_feed = 50 diff --git a/recipes/financialsense.recipe b/recipes/financialsense.recipe index fdf45c45b8..6bc1bc23aa 100644 --- a/recipes/financialsense.recipe +++ b/recipes/financialsense.recipe @@ -22,12 +22,12 @@ class FinancialSense(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://www.financialsense.com/sites/default/files/logo.jpg' - extra_css = """ + extra_css = ''' body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} h2{color: gray} .name{margin-right: 5em} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/financiarul.recipe b/recipes/financiarul.recipe index 0f96e054b5..cd601b30b1 100644 --- a/recipes/financiarul.recipe +++ b/recipes/financiarul.recipe @@ -33,7 +33,7 @@ class Financiarul(BasicNewsRecipe): ] remove_tags = [ - dict(name='div', attrs={'class': ['infoArticol']}), dict(name='ul', attrs={'class': 'navSectiuni'}), dict(name='div', attrs={'class': 'separator separatorTop'}), dict(name='div', attrs={'class': 'infoArticol infoArticolBottom'}), dict(name='ul', attrs={'class': ['related']}), dict(name='div', attrs={'class': ['slot panel300 panelGri300 panelGri300s panelGri300sm']}) # noqa + dict(name='div', attrs={'class': ['infoArticol']}), dict(name='ul', attrs={'class': 'navSectiuni'}), dict(name='div', attrs={'class': 'separator separatorTop'}), dict(name='div', attrs={'class': 'infoArticol infoArticolBottom'}), dict(name='ul', attrs={'class': ['related']}), dict(name='div', attrs={'class': ['slot panel300 panelGri300 panelGri300s panelGri300sm']}) # noqa: E501 ] remove_tags_after = [ diff --git a/recipes/first_things.recipe b/recipes/first_things.recipe index c044afa125..17d6a18eb2 100644 --- a/recipes/first_things.recipe +++ b/recipes/first_things.recipe @@ -16,7 +16,7 @@ class FirstThings(BasicNewsRecipe): title = 'First Things' __author__ = 'John Hutson' - description = 'America\'s Most Influential Journal of Religion and Public Life' + description = "America's Most Influential Journal of Religion and Public Life" INDEX = 'https://www.firstthings.com/current-edition' language = 'en' encoding = 'utf-8' diff --git a/recipes/firstpost.recipe b/recipes/firstpost.recipe index 75e62501a4..2d0c7e7d19 100644 --- a/recipes/firstpost.recipe +++ b/recipes/firstpost.recipe @@ -41,7 +41,7 @@ class firstpost(BasicNewsRecipe): # 'photos', 'entertainment', 'living', 'education', 'sports', 'firstcricket', ] - oldest_article = 1.2 # days + oldest_article = 1.2 # days for sec in sections: a = 'https://www.firstpost.com/rss/{}.xml' feeds.append((sec.capitalize(), a.format(sec))) diff --git a/recipes/flickr.recipe b/recipes/flickr.recipe index 63a1b9e66f..474648932e 100644 --- a/recipes/flickr.recipe +++ b/recipes/flickr.recipe @@ -31,13 +31,13 @@ class AdvancedUserRecipe1297031650(BasicNewsRecipe): remove_javascript = True language = 'en' - extra_css = """ + extra_css = ''' p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; } .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; } .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; } - """ + ''' keep_only_tags = [ dict(name='div', attrs={'class': 'entry'}) diff --git a/recipes/flickr_es.recipe b/recipes/flickr_es.recipe index 377e7f154a..b6049c7b2a 100644 --- a/recipes/flickr_es.recipe +++ b/recipes/flickr_es.recipe @@ -31,13 +31,13 @@ class AdvancedUserRecipe1297031650(BasicNewsRecipe): remove_javascript = True language = 'es' - extra_css = """ + extra_css = ''' p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; } .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; } .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; } - """ + ''' keep_only_tags = [ dict(name='div', attrs={'class': 'entry'}) diff --git a/recipes/fluter_de.recipe b/recipes/fluter_de.recipe index 508ae4de6f..525ce1706f 100644 --- a/recipes/fluter_de.recipe +++ b/recipes/fluter_de.recipe @@ -20,13 +20,13 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe): language = 'de' encoding = 'UTF-8' - __author__ = 'Armin Geller' # 2013-02-05 V3 + __author__ = 'Armin Geller' # 2013-02-05 V3 oldest_article = 14 max_articles_per_feed = 50 auto_cleanup = False - feeds = [ + feeds = [ (u'Inhalt:', u'https://www.fluter.de/rss.xml') ] @@ -38,6 +38,6 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe): dict(name='h2', attrs={'class':'element-invisible'}) ] - extra_css = ''' + extra_css = ''' .field-group-format, .group_additional_info, .additional-info {display: inline-block; min-width: 8rem; text-align: center} ''' diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 20321d5efe..9559086188 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -74,7 +74,7 @@ class Fokus(BasicNewsRecipe): return br def get_web_sections(self, main_url: str) -> dict[str, str]: - """Return a dict of (1) section URL and (2) section name key-value pairs found at `main_url`. + '''Return a dict of (1) section URL and (2) section name key-value pairs found at `main_url`. For example, if the Fokus website currently includes an 'Aktuellt' section, the dict should include an entry on the form: `{'https://www.fokus.se/aktuellt': 'Aktuellt'}`. @@ -84,7 +84,7 @@ class Fokus(BasicNewsRecipe): Yields: dict[str, str]: (1) URLs and (2) human-readable names of Fokus sections. - """ + ''' self.log(f"Identifying all sections under '{main_url}'...") soup = self.index_to_soup(main_url) @@ -110,14 +110,14 @@ class Fokus(BasicNewsRecipe): return section_urls_and_names def parse_article_blurb(self, article_blurb) -> dict[str, str, str, str] | None: - """Given a
tag of class 'Blurb', parse it into a dict. + '''Given a
tag of class 'Blurb', parse it into a dict. Args: article_blurb (Tag): An
tag hosting metadata and the URL of an article. Returns: dict[str, str, str, str]: A dict on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. - """ + ''' if a_tag := article_blurb.find('a', href=True): url = a_tag['href'].strip().rstrip('/') if url.startswith('/'): @@ -147,7 +147,7 @@ class Fokus(BasicNewsRecipe): return def _get_article_blurbs(self, soup) -> dict[str, dict[str, str, str, str]]: - """Given a Fokus webpage `soup`, return a dict of unique article entries found on the page. + '''Given a Fokus webpage `soup`, return a dict of unique article entries found on the page. The key of a given entry in the output dictionary is the article URL. The corresponding value is a dictionary on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. @@ -157,10 +157,10 @@ class Fokus(BasicNewsRecipe): Returns: dict[str, dict[str, str, str, str]]: A dict with article URLs as keys and 'article dicts' as values. - """ + ''' def _log(article) -> None: - """Log a digestible summary of the input `article` blurb.""" + '''Log a digestible summary of the input `article` blurb.''' log_message = f"\t{article['title']} : {article['date']} : {article['url']}" if article.get('description'): log_message += f" : {article['description']}" @@ -187,7 +187,7 @@ class Fokus(BasicNewsRecipe): return parsed_blurbs def get_article_blurbs(self, sections: dict[str, str]) -> dict[str, dict[str, str, str, str]]: - """Create and return a dict of all unique article blurbs found in all `sections`. + '''Create and return a dict of all unique article blurbs found in all `sections`. The key of a given entry in the output dictionary is the article URL. The corresponding value is a dictionary on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. @@ -197,7 +197,7 @@ class Fokus(BasicNewsRecipe): Returns: dict[str, dict[str, str, str, str]]: A dict with article URLs as keys and 'article dicts' as values. - """ + ''' self.log(f'Identifying all articles under all {len(sections)} sections...') article_blurbs = {} @@ -224,7 +224,7 @@ class Fokus(BasicNewsRecipe): sections: dict[str, str], articles: dict[str, dict[str, str, str, str]], ) -> dict[str, list[dict[str, str, str, str]]]: - """Assign each article in `articles` to a section in `sections`. + '''Assign each article in `articles` to a section in `sections`. Args: sections (dict[str, str]): A dict of section URLs as keys and section titles as values. @@ -232,7 +232,7 @@ class Fokus(BasicNewsRecipe): Returns: dict[str, list[dict[str, str, str, str]]]: A dict on a `{section_title: list[article_dict]}` format. - """ + ''' self.log(f'Assigning each of the {len(articles)} articles to either of the {len(sections)} sections...') section_to_articles = {} for article_url, article_dict in articles.items(): @@ -276,7 +276,7 @@ class Fokus(BasicNewsRecipe): section_to_articles = self.assign_articles_to_sections(sections, articles) # Convert to the expected `list[tuple[str, dict[str, str, str, str]]]` format. - feeds = [(section_url, article_dicts) for section_url, article_dicts in section_to_articles.items()] + feeds = list(section_to_articles.items()) num_articles = sum(len(article_dicts) for article_dicts in section_to_articles.values()) self.log(f'A total of {num_articles} articles belonging to {len(section_to_articles)} sections were kept.') diff --git a/recipes/folha.recipe b/recipes/folha.recipe index 3bc31aee0e..91a9371c28 100644 --- a/recipes/folha.recipe +++ b/recipes/folha.recipe @@ -28,10 +28,10 @@ class Folha_de_s_paulo(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newspaper' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/folhadesaopaulo_sub.recipe b/recipes/folhadesaopaulo_sub.recipe index 0b1d263cca..35a61458f0 100644 --- a/recipes/folhadesaopaulo_sub.recipe +++ b/recipes/folhadesaopaulo_sub.recipe @@ -11,8 +11,8 @@ class FSP(BasicNewsRecipe): title = u'Folha de S\xE3o Paulo' __author__ = 'Joao Eduardo Bertacchi - lc_addicted, 2020 Leonardo Amaral - leleobhz' - description = u'Printed edition contents. Folha subscription required (UOL subscription currently not supported).' + \ - u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes Folha. N\xE3o suporta assinantes UOL]' + description = (u'Printed edition contents. Folha subscription required (UOL subscription currently not supported).' + u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes Folha. N\xE3o suporta assinantes UOL]') today = datetime.date.today() @@ -49,7 +49,7 @@ class FSP(BasicNewsRecipe): # this solves the problem with truncated content in Kindle conversion_options = {'linearize_tables': True} - extra_css = """ + extra_css = ''' #articleNew { font: 18px Times New Roman,verdana,arial; } img { background: none !important; float: none; margin: 0px; } .newstexts { list-style-type: none; height: 20px; margin: 15px 0 10px 0; } @@ -82,14 +82,14 @@ img { background: none !important; float: none; margin: 0px; } .divisor { text-indent: -9999px; border-bottom: 1px solid #ccc; height: 1px; margin: 0; } .star { background: none !important; height: 15px; } .articleGraphic { margin-bottom: 20px; } -""" +''' # This is the code for login, here a mini browser is called and id entered def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open('https://login.folha.com.br/login') - br.select_form(action="https://login.folha.com.br/login") + br.select_form(action='https://login.folha.com.br/login') br['email'] = self.username br['password'] = self.password br.submit() @@ -109,7 +109,7 @@ img { background: none !important; float: none; margin: 0px; } for post in soup.findAll('a'): strpost = str(post) - if re.match('Czytaj więcej.*', re.DOTALL), lambda m: '')] + (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] # noqa: RUF039 keep_only_tags = [dict(id='gazeta_article')] remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict( attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] diff --git a/recipes/gazeta_pl_warszawa.recipe b/recipes/gazeta_pl_warszawa.recipe index a0fa38e851..df3dcd3b40 100644 --- a/recipes/gazeta_pl_warszawa.recipe +++ b/recipes/gazeta_pl_warszawa.recipe @@ -33,7 +33,7 @@ class gw_wawa(BasicNewsRecipe): # rules for gazeta.pl preprocess_regexps = [ - (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] + (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] # noqa: RUF039 keep_only_tags = [dict(id='gazeta_article')] remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict( attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] diff --git a/recipes/gazetaua_ru.recipe b/recipes/gazetaua_ru.recipe index 5ca2113039..f3a3c22476 100644 --- a/recipes/gazetaua_ru.recipe +++ b/recipes/gazetaua_ru.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class GazetaUA(BasicNewsRecipe): title = 'Gazeta.ua' __author__ = 'bugmen00t' - description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0430\u044F \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0432 \u043C\u0438\u0440\u0435: \u0441\u043E\u0431\u044B\u0442\u0438\u044F, \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.' # noqa - publisher = '\u0418\u0437\u0434\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u0430\u044F \u0433\u0440\u0443\u043F\u043F\u0430 "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"' # noqa + description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0430\u044F \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0432 \u043C\u0438\u0440\u0435: \u0441\u043E\u0431\u044B\u0442\u0438\u044F, \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.' # noqa: E501 + publisher = '\u0418\u0437\u0434\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u0430\u044F \u0433\u0440\u0443\u043F\u043F\u0430 "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"' # noqa: E501 category = 'newspaper' cover_url = u'https://static2.gazeta.ua/img2/cache/preview/1064/1064219_w_450.jpg' language = 'ru_UK' @@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe): remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'}) - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'mt5'}), dict(name='div', attrs={'class': 'interview-block'}), dict(name='p', attrs={'id': 'mce_0'}), diff --git a/recipes/gazetaua_ua.recipe b/recipes/gazetaua_ua.recipe index 2c9dfdb70c..02a50c9a59 100644 --- a/recipes/gazetaua_ua.recipe +++ b/recipes/gazetaua_ua.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class GazetaUA(BasicNewsRecipe): title = 'Gazeta.ua' __author__ = 'bugmen00t' - description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438 \u0437 \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0456 \u0441\u0432\u0456\u0442\u0443: \u043F\u043E\u0434\u0456\u0457, \u043F\u043E\u043B\u0456\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.' # noqa - publisher = '\u0422\u041E\u0412 "\u0412\u0438\u0434\u0430\u0432\u043D\u0438\u0447\u0430 \u043A\u043E\u043C\u043F\u0430\u043D\u0456\u044F "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"' # noqa + description = '\u0412\u0441\u0435\u0443\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u043E\u043D-\u043B\u0430\u0439\u043D \u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u043F\u0435\u0440\u0430\u0442\u0438\u0432\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438 \u0437 \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0456 \u0441\u0432\u0456\u0442\u0443: \u043F\u043E\u0434\u0456\u0457, \u043F\u043E\u043B\u0456\u0442\u0438\u043A\u0430, \u0441\u043F\u043E\u0440\u0442, \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0430.' # noqa: E501 + publisher = '\u0422\u041E\u0412 "\u0412\u0438\u0434\u0430\u0432\u043D\u0438\u0447\u0430 \u043A\u043E\u043C\u043F\u0430\u043D\u0456\u044F "\u041D\u043E\u0432\u0430 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F"' # noqa: E501 category = 'newspaper' cover_url = u'https://static2.gazeta.ua/img2/cache/preview/1064/1064219_w_450.jpg' language = 'uk' @@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe): remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'}) - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'mt5'}), dict(name='div', attrs={'class': 'interview-block'}), dict(name='p', attrs={'id': 'mce_0'}), diff --git a/recipes/geekcity.recipe b/recipes/geekcity.recipe index ba1fae13f5..4e8aa97845 100644 --- a/recipes/geekcity.recipe +++ b/recipes/geekcity.recipe @@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class GeekCity(BasicNewsRecipe): title = u'GeekCity' - description = u'\u041F\u043E\u0440\u0442\u0430\u043B \u043E \u043A\u043E\u043C\u0438\u043A\u0441\u0430\u0445, \u043A\u0438\u043D\u043E, \u0441\u0435\u0440\u0438\u0430\u043B\u0430\u0445, \u0438\u0433\u0440\u0430\u0445, sci-fi \u0438 \u043C\u043D\u043E\u0433\u043E\u043C \u0434\u0440\u0443\u0433\u043E\u043C. \u0417\u0434\u0435\u0441\u044C \u0432\u044B \u0432\u0441\u0435\u0433\u0434\u0430 \u0441\u043C\u043E\u0436\u0435\u0442\u0435 \u043D\u0430\u0439\u0442\u0438 \u0441\u0432\u0435\u0436\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u0441\u0430\u043C\u044B\u0445 \u0432\u0430\u0436\u043D\u044B\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u0445 \u0432 \u043C\u0438\u0440\u0435 \u0433\u0438\u043A\u043E\u0432, \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0435 \u043E\u0431\u0437\u043E\u0440\u044B, \u0433\u0430\u0439\u0434\u044B, \u0441\u0442\u0430\u0442\u044C\u0438 \u0438 \u043C\u043D\u043E\u0433\u043E\u0435 \u0434\u0440\u0443\u0433\u043E\u0435.' # noqa + description = u'\u041F\u043E\u0440\u0442\u0430\u043B \u043E \u043A\u043E\u043C\u0438\u043A\u0441\u0430\u0445, \u043A\u0438\u043D\u043E, \u0441\u0435\u0440\u0438\u0430\u043B\u0430\u0445, \u0438\u0433\u0440\u0430\u0445, sci-fi \u0438 \u043C\u043D\u043E\u0433\u043E\u043C \u0434\u0440\u0443\u0433\u043E\u043C. \u0417\u0434\u0435\u0441\u044C \u0432\u044B \u0432\u0441\u0435\u0433\u0434\u0430 \u0441\u043C\u043E\u0436\u0435\u0442\u0435 \u043D\u0430\u0439\u0442\u0438 \u0441\u0432\u0435\u0436\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u0441\u0430\u043C\u044B\u0445 \u0432\u0430\u0436\u043D\u044B\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u0445 \u0432 \u043C\u0438\u0440\u0435 \u0433\u0438\u043A\u043E\u0432, \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0435 \u043E\u0431\u0437\u043E\u0440\u044B, \u0433\u0430\u0439\u0434\u044B, \u0441\u0442\u0430\u0442\u044C\u0438 \u0438 \u043C\u043D\u043E\u0433\u043E\u0435 \u0434\u0440\u0443\u0433\u043E\u0435.' # noqa: E501 __author__ = 'bugmen00t' publisher = 'GeekCity.ru' publication_type = 'blog' @@ -21,7 +21,7 @@ class GeekCity(BasicNewsRecipe): remove_tags_after = dict(name='article') - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'term-badges floated'}), dict(name='div', attrs={'class': 'post-meta single-post-meta'}), dict(name='div', attrs={'class': 'post-share single-post-share top-share clearfix style-1'}), diff --git a/recipes/github.recipe b/recipes/github.recipe index 1024624ffc..6eae771bd6 100644 --- a/recipes/github.recipe +++ b/recipes/github.recipe @@ -13,13 +13,13 @@ class GithubBlog(BasicNewsRecipe): oldest_article = 14 keep_only_tags = [ - dict(name='div', attrs={'class': 'col-12 offset-lg-1 col-lg-10 col-xl-7 mt-5 mt-lg-10 mb-6 mb-lg-8' }), - dict(name='section', attrs={'class': lambda x: x and 'post__content' in x.split(' ') }) + dict(name='div', attrs={'class': 'col-12 offset-lg-1 col-lg-10 col-xl-7 mt-5 mt-lg-10 mb-6 mb-lg-8'}), + dict(name='section', attrs={'class': lambda x: x and 'post__content' in x.split(' ')}) ] remove_tags = [ - dict(name='div', attrs={'class': lambda x: x and 'post-tags' in x.split(' ') }), - dict(name='ul', attrs={'class': lambda x: x and 'post-hero__categories' in x.split(' ') }) + dict(name='div', attrs={'class': lambda x: x and 'post-tags' in x.split(' ')}), + dict(name='ul', attrs={'class': lambda x: x and 'post-hero__categories' in x.split(' ')}) ] preprocess_regexps = [ diff --git a/recipes/glasgow_herald.recipe b/recipes/glasgow_herald.recipe index 388dff9783..fd48f22e84 100644 --- a/recipes/glasgow_herald.recipe +++ b/recipes/glasgow_herald.recipe @@ -11,7 +11,6 @@ class GlasgowHerald(BasicNewsRecipe): title = u'Glasgow Herald' oldest_article = 1 max_articles_per_feed = 100 - no_stylesheets = True language = 'en_GB' __author__ = 'Kovid Goyal' diff --git a/recipes/globaltimes.recipe b/recipes/globaltimes.recipe index e7d808c1ad..c33c67762d 100644 --- a/recipes/globaltimes.recipe +++ b/recipes/globaltimes.recipe @@ -7,6 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes index = 'https://www.globaltimes.cn/' + class GlobalTimes(BasicNewsRecipe): title = 'Global Times' __author__ = 'unkn0wn' @@ -19,7 +20,7 @@ class GlobalTimes(BasicNewsRecipe): encoding = 'utf-8' remove_empty_feeds = True resolve_internal_links = True - oldest_article = 1 # days + oldest_article = 1 # days def get_cover_url(self): soup = self.index_to_soup('https://en.kiosko.net/cn/np/cn_global_times.html') diff --git a/recipes/globes_co_il.recipe b/recipes/globes_co_il.recipe index ffd5c71f9d..8102642749 100644 --- a/recipes/globes_co_il.recipe +++ b/recipes/globes_co_il.recipe @@ -7,7 +7,7 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): title = u'Globes' language = 'he' __author__ = 'marbs & barakplasma' - extra_css = 'img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }' # noqa + extra_css = 'img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }' # noqa: E501 simultaneous_downloads = 5 remove_javascript = True keep_only_tags = [ @@ -21,21 +21,21 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): remove_attributes = ['width', 'style'] feeds = [ - (u"עידכוני RSS ", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3038"), - (u"כל הכתבות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2"), - (u"שוק ההון", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585"), - (u"בארץ", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9917"), - (u"גלובלי ושוקי עולם", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225"), - (u"גלובסטק", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594"), - (u"דין וחשבון", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829"), - (u"דעות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845"), - (u"וידאו", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2007"), - (u"ליידי גלובס", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3314"), - (u"מגזין G", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3312"), - (u"nadlan", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607"), - (u"נתח שוק וצרכנות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821"), - (u"מטבעות דיגיטליים", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9758"), - (u"קריירה", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=3266"), - (u"תיירות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=9010"), - (u"רכב", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220") + (u'עידכוני RSS ', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3038'), + (u'כל הכתבות', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2'), + (u'שוק ההון', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'), + (u'בארץ', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9917'), + (u'גלובלי ושוקי עולם', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'), + (u'גלובסטק', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'), + (u'דין וחשבון', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'), + (u'דעות', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'), + (u'וידאו', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2007'), + (u'ליידי גלובס', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3314'), + (u'מגזין G', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3312'), + (u'nadlan', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'), + (u'נתח שוק וצרכנות', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'), + (u'מטבעות דיגיטליים', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9758'), + (u'קריירה', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=3266'), + (u'תיירות', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=9010'), + (u'רכב', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220') ] diff --git a/recipes/go_comics.recipe b/recipes/go_comics.recipe index 57c23d87e1..fbd79f714a 100644 --- a/recipes/go_comics.recipe +++ b/recipes/go_comics.recipe @@ -61,7 +61,7 @@ class GoComics(BasicNewsRecipe): # (u"Andertoons",u"http://www.gocomics.com/andertoons"), # (u"Andy Capp",u"http://www.gocomics.com/andycapp"), # (u"Angry Little Girls",u"http://www.gocomics.com/angry-little-girls"), - (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"), + (u'Animal Crackers', u'http://www.gocomics.com/animalcrackers'), # (u"Annie",u"http://www.gocomics.com/annie"), # (u"The Argyle Sweater",u"http://www.gocomics.com/theargylesweater"), # (u"Robert Ariail",u"http://www.gocomics.com/robert-ariail"), @@ -71,14 +71,14 @@ class GoComics(BasicNewsRecipe): # (u"At the Zoo",u"http://www.gocomics.com/at-the-zoo"), # (u"Aunty Acid",u"http://www.gocomics.com/aunty-acid"), # (u"The Awkward Yeti",u"http://www.gocomics.com/the-awkward-yeti"), - (u"B.C.",u"http://www.gocomics.com/bc"), + (u'B.C.',u'http://www.gocomics.com/bc'), # (u"Back to B.C.",u"http://www.gocomics.com/back-to-bc"), # (u"Back in the Day",u"http://www.gocomics.com/backintheday"), # (u"bacon",u"http://www.gocomics.com/bacon"), # (u"Bad Machinery",u"http://www.gocomics.com/bad-machinery"), # (u"Bad Reporter",u"http://www.gocomics.com/badreporter"), # (u"Badlands",u"http://www.gocomics.com/badlands"), - (u"Baldo",u"http://www.gocomics.com/baldo"), + (u'Baldo',u'http://www.gocomics.com/baldo'), # (u"Ballard Street",u"http://www.gocomics.com/ballardstreet"), # (u"Banana Triangle",u"http://www.gocomics.com/banana-triangle"), # (u"Barkeater Lake Pandolph",u"http://www.gocomics.com/barkeaterlake"), @@ -115,10 +115,10 @@ class GoComics(BasicNewsRecipe): # (u"Chip Bok",u"http://www.gocomics.com/chipbok"), # (u"Boomerangs",u"http://www.gocomics.com/boomerangs"), # (u"The Boondocks",u"http://www.gocomics.com/boondocks"), - (u"The Born Loser",u"http://www.gocomics.com/the-born-loser"), + (u'The Born Loser',u'http://www.gocomics.com/the-born-loser'), # (u"Matt Bors",u"http://www.gocomics.com/matt-bors"), # (u"Bottomliners",u"http://www.gocomics.com/bottomliners"), - (u"Bound and Gagged",u"http://www.gocomics.com/boundandgagged"), + (u'Bound and Gagged',u'http://www.gocomics.com/boundandgagged'), # (u"Brain Squirts",u"http://www.gocomics.com/brain-squirts"), # (u"Break of Day",u"http://www.gocomics.com/break-of-day"), # (u"Breaking Cat News",u"http://www.gocomics.com/breaking-cat-news"), @@ -126,12 +126,12 @@ class GoComics(BasicNewsRecipe): # (u"Brevity",u"http://www.gocomics.com/brevitypanel"), # (u"Brewster Rockit",u"http://www.gocomics.com/brewsterrockit"), # (u"Chris Britt",u"http://www.gocomics.com/chrisbritt"), - (u"Broom Hilda",u"http://www.gocomics.com/broomhilda"), + (u'Broom Hilda',u'http://www.gocomics.com/broomhilda'), # (u"The Buckets",u"http://www.gocomics.com/thebuckets"), # (u"Bully",u"http://www.gocomics.com/bully"), # (u"Buni",u"http://www.gocomics.com/buni"), # (u"Bushy Tales",u"http://www.gocomics.com/bushy-tales"), - (u"Calvin and Hobbes",u"http://www.gocomics.com/calvinandhobbes"), + (u'Calvin and Hobbes',u'http://www.gocomics.com/calvinandhobbes'), # (u"Candorville",u"http://www.gocomics.com/candorville"), # (u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"), # (u"Ken Catalino",u"http://www.gocomics.com/kencatalino"), @@ -202,7 +202,7 @@ class GoComics(BasicNewsRecipe): # (u"Flo and Friends",u"http://www.gocomics.com/floandfriends"), # (u"The Flying McCoys",u"http://www.gocomics.com/theflyingmccoys"), # (u"Foolish Mortals",u"http://www.gocomics.com/foolish-mortals"), - (u"For Better or For Worse",u"http://www.gocomics.com/forbetterorforworse"), + (u'For Better or For Worse',u'http://www.gocomics.com/forbetterorforworse'), # (u"For Heaven's Sake",u"http://www.gocomics.com/forheavenssake"), # (u"Fort Knox",u"http://www.gocomics.com/fortknox"), # (u"Four Eyes",u"http://www.gocomics.com/four-eyes"), @@ -210,7 +210,7 @@ class GoComics(BasicNewsRecipe): # (u"FoxTrot",u"http://www.gocomics.com/foxtrot"), # (u"FoxTrot Classics",u"http://www.gocomics.com/foxtrotclassics"), # (u"Francis",u"http://www.gocomics.com/francis"), - (u"Frank and Ernest",u"http://www.gocomics.com/frank-and-ernest"), + (u'Frank and Ernest',u'http://www.gocomics.com/frank-and-ernest'), # (u"Frankie Comics",u"http://www.gocomics.com/frankie-comics"), # (u"Frazz",u"http://www.gocomics.com/frazz"), # (u"Fred Basset",u"http://www.gocomics.com/fredbasset"), @@ -219,7 +219,7 @@ class GoComics(BasicNewsRecipe): # (u"Frog Applause",u"http://www.gocomics.com/frogapplause"), # (u"From the Mo Willems Sketchbook",u"http://www.gocomics.com/from-the-mo-willems-sketchbook"), # (u"The Fusco Brothers",u"http://www.gocomics.com/thefuscobrothers"), - (u"Garfield",u"http://www.gocomics.com/garfield"), + (u'Garfield',u'http://www.gocomics.com/garfield'), # (u"Garfield Classics",u"http://www.gocomics.com/garfield-classics"), # (u"Garfield Minus Garfield",u"http://www.gocomics.com/garfieldminusgarfield"), # (u"Gasoline Alley",u"http://www.gocomics.com/gasolinealley"), @@ -227,7 +227,7 @@ class GoComics(BasicNewsRecipe): # (u"Gentle Creatures",u"http://www.gocomics.com/gentle-creatures"), # (u"The Gentleman's Armchair",u"http://www.gocomics.com/the-gentlemans-armchair"), # (u"Get a Life",u"http://www.gocomics.com/getalife"), - (u"Get Fuzzy",u"http://www.gocomics.com/getfuzzy"), + (u'Get Fuzzy',u'http://www.gocomics.com/getfuzzy'), # (u"Gil",u"http://www.gocomics.com/gil"), # (u"Gil Thorp",u"http://www.gocomics.com/gilthorp"), # (u"Ginger Meggs",u"http://www.gocomics.com/gingermeggs"), @@ -248,7 +248,7 @@ class GoComics(BasicNewsRecipe): # (u"Phil Hands",u"http://www.gocomics.com/phil-hands"), # (u"Health Capsules",u"http://www.gocomics.com/healthcapsules"), # (u"Heart of the City",u"http://www.gocomics.com/heartofthecity"), - (u"Heathcliff",u"http://www.gocomics.com/heathcliff"), + (u'Heathcliff',u'http://www.gocomics.com/heathcliff'), # (u"Joe Heller",u"http://www.gocomics.com/joe-heller"), # (u"Rebecca Hendin",u"http://www.gocomics.com/rebecca-hendin"), # (u"Herb and Jamaal",u"http://www.gocomics.com/herbandjamaal"), @@ -313,7 +313,7 @@ class GoComics(BasicNewsRecipe): # (u"Lost Side of Suburbia",u"http://www.gocomics.com/lostsideofsuburbia"), # (u"Lost Sheep",u"http://www.gocomics.com/lostsheep"), # (u"Chan Lowe",u"http://www.gocomics.com/chanlowe"), - (u"Luann",u"http://www.gocomics.com/luann"), + (u'Luann',u'http://www.gocomics.com/luann'), # (u"Luann Againn",u"http://www.gocomics.com/luann-againn"), # (u"Mike Luckovich",u"http://www.gocomics.com/mikeluckovich"), # (u"Lucky Cow",u"http://www.gocomics.com/luckycow"), @@ -326,7 +326,7 @@ class GoComics(BasicNewsRecipe): # (u"Making It",u"http://www.gocomics.com/making-it"), # (u"Maria's Day",u"http://www.gocomics.com/marias-day"), # (u"Gary Markstein",u"http://www.gocomics.com/garymarkstein"), - (u"Marmaduke",u"http://www.gocomics.com/marmaduke"), + (u'Marmaduke',u'http://www.gocomics.com/marmaduke'), # (u"The Martian Confederacy",u"http://www.gocomics.com/the-martian-confederacy"), # (u"MazeToons Puzzle",u"http://www.gocomics.com/mazetoons-puzzle"), # (u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"), @@ -335,13 +335,13 @@ class GoComics(BasicNewsRecipe): # (u"Medium Large",u"http://www.gocomics.com/medium-large"), # (u"Meg Classics",u"http://www.gocomics.com/meg-classics"), # (u"Microcosm",u"http://www.gocomics.com/microcosm"), - (u"The Middletons",u"http://www.gocomics.com/themiddletons"), + (u'The Middletons',u'http://www.gocomics.com/themiddletons'), # (u"Mike du Jour",u"http://www.gocomics.com/mike-du-jour"), # (u"Minimum Security",u"http://www.gocomics.com/minimumsecurity"), # (u"Moderately Confused",u"http://www.gocomics.com/moderately-confused"), # (u"Molebashed",u"http://www.gocomics.com/molebashed"), # (u"Molly and the Bear",u"http://www.gocomics.com/mollyandthebear"), - (u"Momma",u"http://www.gocomics.com/momma"), + (u'Momma',u'http://www.gocomics.com/momma'), # (u"Mom's Cancer",u"http://www.gocomics.com/moms-cancer"), # (u"Monty",u"http://www.gocomics.com/monty"), # (u"Jim Morin",u"http://www.gocomics.com/jimmorin"), @@ -359,7 +359,7 @@ class GoComics(BasicNewsRecipe): # (u"New Adventures of Queen Victoria",u"http://www.gocomics.com/thenewadventuresofqueenvictoria"), # (u"Next Door Neighbors",u"http://www.gocomics.com/next-door-neighbors"), # (u"Nick and Zuzu",u"http://www.gocomics.com/nick-and-zuzu), - (u"Non Sequitur",u"http://www.gocomics.com/nonsequitur"), + (u'Non Sequitur',u'http://www.gocomics.com/nonsequitur'), # (u"The Norm 4.0",u"http://www.gocomics.com/the-norm-4-0"), # (u"The Norm Classics",u"http://www.gocomics.com/thenorm"), # (u"Not Invented Here",u"http://www.gocomics.com/not-invented-here"), @@ -383,10 +383,10 @@ class GoComics(BasicNewsRecipe): # (u"Ozy and Millie",u"http://www.gocomics.com/ozy-and-millie"), # (u"Henry Payne",u"http://www.gocomics.com/henrypayne"), # (u"PC and Pixel",u"http://www.gocomics.com/pcandpixel"), - (u"Peanuts",u"http://www.gocomics.com/peanuts"), + (u'Peanuts',u'http://www.gocomics.com/peanuts'), # (u"Peanuts Begins",u"http://www.gocomics.com/peanuts-begins"), # (u"Peanuts Holiday Countdown",u"http://www.gocomics.com/peanuts-holiday-countdown"), - (u"Pearls Before Swine",u"http://www.gocomics.com/pearlsbeforeswine"), + (u'Pearls Before Swine',u'http://www.gocomics.com/pearlsbeforeswine'), # (u"Perry Bible Fellowship",u"http://www.gocomics.com/perry-bible-fellowship"), # (u"Joel Pett",u"http://www.gocomics.com/joelpett"), # (u"Phoebe and Her Unicorn",u"http://www.gocomics.com/phoebe-and-her-unicorn"), @@ -398,7 +398,7 @@ class GoComics(BasicNewsRecipe): # (u"Pinkerton",u"http://www.gocomics.com/pinkerton"), # (u"Please Listen to Me",u"http://www.gocomics.com/please-listen-to-me"), # (u"Pluggers",u"http://www.gocomics.com/pluggers"), - (u"Pooch Cafe",u"http://www.gocomics.com/poochcafe"), + (u'Pooch Cafe',u'http://www.gocomics.com/poochcafe'), # (u"Poorcraft",u"http://www.gocomics.com/poorcraft"), # (u"Poorly Drawn Lines",u"http://www.gocomics.com/poorly-drawn-lines"), # (u"Pop Culture Shock Therapy",u"http://www.gocomics.com/pop-culture-shock-therapy"), @@ -427,7 +427,7 @@ class GoComics(BasicNewsRecipe): # (u"Ripley's Believe It or Not",u"http://www.gocomics.com/ripleysbelieveitornot"), # (u"Robbie and Bobby",u"http://www.gocomics.com/robbie-and-bobby"), # (u"Rob Rogers",u"http://www.gocomics.com/robrogers"), - (u"Rose is Rose",u"http://www.gocomics.com/roseisrose"), + (u'Rose is Rose',u'http://www.gocomics.com/roseisrose'), # (u"Rubes",u"http://www.gocomics.com/rubes"), # (u"Rudy Park",u"http://www.gocomics.com/rudypark"), # (u"Sarah's Scribbles",u"http://www.gocomics.com/sarahs-scribbles"), @@ -438,7 +438,7 @@ class GoComics(BasicNewsRecipe): # (u"Sheldon",u"http://www.gocomics.com/sheldon"), # (u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"), # (u"Shirley and Son Classics",u"http://www.gocomics.com/shirley-and-son-classics"), - (u"Shoe",u"http://www.gocomics.com/shoe"), + (u'Shoe',u'http://www.gocomics.com/shoe'), # (u"Shoecabbage",u"http://www.gocomics.com/shoecabbage"), # (u"Shortcuts",u"http://www.gocomics.com/shortcuts"), # (u"Shutterbug Follies",u"http://www.gocomics.com/shutterbug-follies"), @@ -524,7 +524,7 @@ class GoComics(BasicNewsRecipe): # (u"Winston",u"http://www.gocomics.com/winston"), # (u"Wit of the World",u"http://www.gocomics.com/witoftheworld"), # (u"CartoonArts International",u"http://www.gocomics.com/witoftheworld"), - (u"Wizard of Id",u"http://www.gocomics.com/wizardofid"), + (u'Wizard of Id',u'http://www.gocomics.com/wizardofid'), # (u"Wizard of Id Classics",u"http://www.gocomics.com/wizard-of-id-classics"), # (u"Wondermark",u"http://www.gocomics.com/wondermark"), # (u"Working Daze",u"http://www.gocomics.com/working-daze"), diff --git a/recipes/google_news.recipe b/recipes/google_news.recipe index 7eca31974a..761dc07050 100644 --- a/recipes/google_news.recipe +++ b/recipes/google_news.recipe @@ -50,7 +50,7 @@ class google_news_de(BasicNewsRecipe): url = e.hdrs.get('location') soup = self.index_to_soup(url) link = soup.find('a', href=True) - skip_sections =[ # add sections you want to skip + skip_sections =[ # add sections you want to skip '/video/', '/videos/', '/media/', 'podcast-' ] if any(x in link['href'] for x in skip_sections): @@ -68,7 +68,7 @@ class google_news_de(BasicNewsRecipe): # feel free to add, wipe out what you need ---- can be edit by user # def get_feeds(self): - url = "https://geolocation-db.com/json" + url = 'https://geolocation-db.com/json' data = self.index_to_soup(url, raw=True) data = json.loads(data) country_code = str(data['country_code']).lower() # for me this is de diff --git a/recipes/gorky.recipe b/recipes/gorky.recipe index e5e671449a..8bc7988cc4 100644 --- a/recipes/gorky.recipe +++ b/recipes/gorky.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class Gorky(BasicNewsRecipe): title = '\u0413\u043E\u0440\u044C\u043A\u0438\u0439' __author__ = 'bugmen00t' - description = '\u041D\u0435\u043A\u043E\u043C\u043C\u0435\u0440\u0447\u0435\u0441\u043A\u0438\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u043A\u043D\u0438\u0433\u0430\u0445 \u0438 \u0447\u0442\u0435\u043D\u0438\u0438.' # noqa - publisher = '\u0410\u041D\u041E "\u0426\u0435\u043D\u0442\u0440 \u043F\u043E \u0441\u043E\u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044E \u0440\u0430\u0437\u0432\u0438\u0442\u0438\u044F \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u044B \u0447\u0442\u0435\u043D\u0438\u044F \u0438 \u043A\u043D\u0438\u0433\u043E\u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u0413\u043E\u0440\u044C\u043A\u0438\u0439 \u041C\u0435\u0434\u0438\u0430\u00BB"' # noqa + description = '\u041D\u0435\u043A\u043E\u043C\u043C\u0435\u0440\u0447\u0435\u0441\u043A\u0438\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u043A\u043D\u0438\u0433\u0430\u0445 \u0438 \u0447\u0442\u0435\u043D\u0438\u0438.' # noqa: E501 + publisher = '\u0410\u041D\u041E "\u0426\u0435\u043D\u0442\u0440 \u043F\u043E \u0441\u043E\u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044E \u0440\u0430\u0437\u0432\u0438\u0442\u0438\u044F \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u044B \u0447\u0442\u0435\u043D\u0438\u044F \u0438 \u043A\u043D\u0438\u0433\u043E\u0438\u0437\u0434\u0430\u043D\u0438\u044F \u00AB\u0413\u043E\u0440\u044C\u043A\u0438\u0439 \u041C\u0435\u0434\u0438\u0430\u00BB"' # noqa: E501 category = 'blog' cover_url = u'https://gorky.media/wp-content/uploads/2016/09/gorky.png' language = 'ru' @@ -22,7 +22,7 @@ class Gorky(BasicNewsRecipe): remove_tags_after = dict(name='footer') - remove_tags = [ + remove_tags = [ dict(name='footer'), dict(name='nav', attrs={'class': 'navbar'}), dict(name='div', attrs={'class': 'hide'}), diff --git a/recipes/gosc_full.recipe b/recipes/gosc_full.recipe index 2f1752f5be..8e373f95f4 100644 --- a/recipes/gosc_full.recipe +++ b/recipes/gosc_full.recipe @@ -2,8 +2,8 @@ from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \ - 2013-2018, Tomasz Długosz, tomek3d@gmail.com' +__copyright__ = ('2011, Piotr Kontek, piotr.kontek@gmail.com ' + '2013-2018, Tomasz Długosz, tomek3d@gmail.com') import re import time @@ -32,7 +32,7 @@ class GN(BasicNewsRecipe): page = doc.xpath( '//div[@class="search-result release-result"]/div[1]/div[1]/a/@href') - if time.strftime("%w") in ['3', '4']: + if time.strftime('%w') in ['3', '4']: return page[5] else: return page[4] @@ -54,7 +54,7 @@ class GN(BasicNewsRecipe): }] feeds.append((u'Na dobry początek', articles)) # columns: - for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}): + for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}): if not addr.span: main_block = self.index_to_soup( 'http://www.gosc.pl' + addr['href']) diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe index 773b25aefa..fc26fb911f 100644 --- a/recipes/gosc_niedzielny.recipe +++ b/recipes/gosc_niedzielny.recipe @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- __license__ = 'GPL v3' -__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \ - 2013-2018, Tomasz Długosz, tomek3d@gmail.com' +__copyright__ = ('2011, Piotr Kontek, piotr.kontek@gmail.com ' + '2013-2018, Tomasz Długosz, tomek3d@gmail.com') import re @@ -50,7 +50,7 @@ class GN(BasicNewsRecipe): }] feeds.append((u'Na dobry początek', articles)) # columns: - for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}): + for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}): if not addr.span: main_block = self.index_to_soup( 'http://www.gosc.pl' + addr['href']) diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index ca5d843478..23651191fc 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -44,7 +44,7 @@ class Gram_pl(BasicNewsRecipe): tag.p.insert(len(tag.p.contents) - 2, BeautifulSoup('

Ocena: {0}

'.format(rate)).h2) for a in soup.findAll('a', href=True): - if 'http://' not in a['href'] and 'https://' not in a['href']: # noqa + if 'http://' not in a['href'] and 'https://' not in a['href']: a['href'] = self.index + a['href'] tag = soup.find(name='span', attrs={'class': 'platforma'}) if tag: diff --git a/recipes/grani.recipe b/recipes/grani.recipe index 8c0c0bec11..5509434320 100644 --- a/recipes/grani.recipe +++ b/recipes/grani.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Grani(BasicNewsRecipe): title = '\u0413\u0440\u0430\u043D\u0438.\u0420\u0443' __author__ = 'bugmen00t' - description = '\u0415\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u0430\u044F \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u0431\u0437\u043E\u0440\u044B \u0438 \u0430\u043D\u0430\u043B\u0438\u0437 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0445 \u0438 \u043C\u0438\u0440\u043E\u0432\u044B\u0445 \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u0439, \u0441\u0442\u0430\u0442\u044C\u0438 \u0438 \u0432\u0438\u0434\u0435\u043E \u043E \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0437\u0430\u043A\u043B\u044E\u0447\u0451\u043D\u043D\u044B\u0445.' # noqa + description = '\u0415\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u0430\u044F \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0433\u0430\u0437\u0435\u0442\u0430. \u041E\u0431\u0437\u043E\u0440\u044B \u0438 \u0430\u043D\u0430\u043B\u0438\u0437 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0445 \u0438 \u043C\u0438\u0440\u043E\u0432\u044B\u0445 \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u0439, \u0441\u0442\u0430\u0442\u044C\u0438 \u0438 \u0432\u0438\u0434\u0435\u043E \u043E \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0437\u0430\u043A\u043B\u044E\u0447\u0451\u043D\u043D\u044B\u0445.' # noqa: E501 publisher = '\u041E\u041E\u041E "\u0424\u043B\u0430\u0432\u0443\u0441"' category = 'newspaper' cover_url = u'https://graniru.org/files/96172.png' @@ -21,7 +21,7 @@ class Grani(BasicNewsRecipe): remove_tags_before = dict(name='h3') - remove_tags_after = dict(name='div', attrs={'class': 'main-text clearfix'}), + remove_tags_after = dict(name='div', attrs={'class': 'main-text clearfix'}) # Original feeds # feeds = [ diff --git a/recipes/granta.recipe b/recipes/granta.recipe index 910e0a2cf5..3b84de65a6 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -47,18 +47,18 @@ def multiply_with_unknown_component(first_comp, second_comp, result): def solve_captcha(captcha): - # # Convert from a word problem into a numeric problem + # Convert from a word problem into a numeric problem numeric_problem = '' for part in captcha.split(' '): numeric_problem = numeric_problem + str(text2num(part)) # Parse into parts pattern = re.compile( - u'(?P[0-9]+)?' - u'\\s*(?P[+×−])\\s*' - u'(?P[0-9]+)' - u'\\s*(=)\\s*' - u'(?P[0-9]+)?', re.UNICODE) + r'(?P[0-9]+)?' + u'\\s*(?P[+×−])\\s*' # noqa: RUF039 + r'(?P[0-9]+)' + r'\s*(=)\s*' + r'(?P[0-9]+)?', re.UNICODE) calculationParts = re.search(pattern, numeric_problem) if calculationParts is None: @@ -138,14 +138,14 @@ Magnitude = { def text2num(s): - a = re.split(r"[\s-]+", s) + a = re.split(r'[\s-]+', s) n = 0 g = 0 for w in a: x = Small.get(w, None) if x is not None: g += x - elif w == "hundred" and g != 0: + elif w == 'hundred' and g != 0: g *= 100 else: x = Magnitude.get(w, None) @@ -195,7 +195,7 @@ class Granta(BasicNewsRecipe): if captcha_question is not None: captcha = str(solve_captcha(captcha_question)) - br.select_form(method="post", action="https://granta.com/") + br.select_form(method='post', action='https://granta.com/') br['username'] = self.username br['password'] = self.password br['capcha'] = captcha diff --git a/recipes/grantland.recipe b/recipes/grantland.recipe index c0d400e84f..8500ea5313 100644 --- a/recipes/grantland.recipe +++ b/recipes/grantland.recipe @@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class GrantLand(BasicNewsRecipe): - title = u"Grantland" + title = u'Grantland' description = 'Writings on Sports & Pop Culture' language = 'en' __author__ = 'barty on mobileread.com forum' @@ -47,11 +47,11 @@ class GrantLand(BasicNewsRecipe): for category in self.CATEGORIES: - (cat_name, tag, max_articles) = category + cat_name, tag, max_articles = category self.log('Reading category:', cat_name) articles = [] - page = "%s/%s" % (self.INDEX, tag) + page = '%s/%s' % (self.INDEX, tag) soup = self.index_to_soup(page) main = soup.find('div', id='col-main') diff --git a/recipes/greensboro_news_and_record.recipe b/recipes/greensboro_news_and_record.recipe index 003d0f3c74..1e04fb8864 100644 --- a/recipes/greensboro_news_and_record.recipe +++ b/recipes/greensboro_news_and_record.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class NewsandRecord(BasicNewsRecipe): title = u'Greensboro News & Record' - description = "News from Greensboro, North Carolina" + description = 'News from Greensboro, North Carolina' __author__ = 'Walt Anthony' publisher = 'News & Record and Landmark Media Enterprises, LLC' category = 'news, USA' @@ -27,11 +27,11 @@ class NewsandRecord(BasicNewsRecipe): feeds = [ ('News', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=news,news/*&f=rss'), - ('Greensboro News', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/local,news/crime,news/goverment,news/schools,news/rockingham_county,news/local,news/crime,news/goverment,news/schools,news/rockingham_county/*&f=rss'), # noqa + ('Greensboro News', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/local,news/crime,news/goverment,news/schools,news/rockingham_county,news/local,news/crime,news/goverment,news/schools,news/rockingham_county/*&f=rss'), # noqa: E501 ('Business', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=business,business/*&f=rss'), - ('Local Business', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=business/local_business,business/local_business/*&f=rss'), # noqa + ('Local Business', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=business/local_business,business/local_business/*&f=rss'), # noqa: E501 ('Sports', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports,sports/*&f=rss'), - ('College Sports', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=sports/college,sports/college/*&f=rss'), # noqa - ('Sports Extra', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=blogs/sports_extra,blogs/sports_extra/*&f=rss'), # noqa + ('College Sports', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=sports/college,sports/college/*&f=rss'), # noqa: E501 + ('Sports Extra', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=blogs/sports_extra,blogs/sports_extra/*&f=rss'), # noqa: E501 ('Life', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=life,life/*&f=rss'), ] diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe index d769443d29..d9a8771873 100644 --- a/recipes/gry_online_pl.recipe +++ b/recipes/gry_online_pl.recipe @@ -21,7 +21,7 @@ class GryOnlinePl(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'class': [ 'gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})] remove_tags = [ - dict({'class': ['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2', 'social-for-old-news', 'social-for-old-rec']})] # noqa + {'class': ['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2', 'social-for-old-news', 'social-for-old-rec']}] # noqa: E501 feeds = [ (u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] @@ -33,8 +33,7 @@ class GryOnlinePl(BasicNewsRecipe): url_part = soup.find('link', attrs={'rel': 'canonical'})['href'] url_part = url_part[25:].rpartition('?')[0] for nexturl in nexturls[1:-1]: - finalurl = 'http://www.gry-online.pl/' + \ - url_part + nexturl['href'] + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] for i in range(10): try: soup2 = self.index_to_soup(finalurl) @@ -69,8 +68,7 @@ class GryOnlinePl(BasicNewsRecipe): break else: nexturl = tag.a - finalurl = 'http://www.gry-online.pl/' + \ - url_part + nexturl['href'] + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] for i in range(10): try: soup2 = self.index_to_soup(finalurl) diff --git a/recipes/gsp.recipe b/recipes/gsp.recipe index 6a2e611ab4..af9e2f5299 100644 --- a/recipes/gsp.recipe +++ b/recipes/gsp.recipe @@ -29,7 +29,7 @@ class GSP(BasicNewsRecipe): 'comments': description, 'tags': category, 'language': language, 'publisher': publisher } - keep_only_tags = [dict(name='h1', attrs={'class': 'serif title_2'}), dict(name='div', attrs={'id': 'only_text'}), dict(name='span', attrs={'class': 'block poza_principala'}) # noqa + keep_only_tags = [dict(name='h1', attrs={'class': 'serif title_2'}), dict(name='div', attrs={'id': 'only_text'}), dict(name='span', attrs={'class': 'block poza_principala'}) # noqa: E501 ] feeds = [(u'\u0218tiri', u'http://www.gsp.ro/rss.xml')] diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index cf6df01729..9e05967e4a 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -22,10 +22,10 @@ class Guardian(BasicNewsRecipe): title = u'The Guardian and The Observer' is_observer = False - base_url = "https://www.theguardian.com/uk" + base_url = 'https://www.theguardian.com/uk' if date.today().weekday() == 6: is_observer = True - base_url = "https://www.theguardian.com/observer" + base_url = 'https://www.theguardian.com/observer' __author__ = 'Kovid Goyal' language = 'en_GB' @@ -64,7 +64,7 @@ class Guardian(BasicNewsRecipe): classes('content__article-body js-bottom-marker article-body-commercial-selector'), ] - extra_css = """ + extra_css = ''' img { max-width: 100% !important; max-height: 100% !important; @@ -78,7 +78,7 @@ class Guardian(BasicNewsRecipe): font-size: 0.5em; color: #6B6B6B; } - """ + ''' def get_browser(self, *a, **kw): # This site returns images in JPEG-XR format if the user agent is IE diff --git a/recipes/gva_be.recipe b/recipes/gva_be.recipe index e2f304f26e..6ba7792ffc 100644 --- a/recipes/gva_be.recipe +++ b/recipes/gva_be.recipe @@ -50,10 +50,10 @@ class GazetvanAntwerpen(BasicNewsRecipe): remove_tags = [ dict(name=['embed', 'object']), dict(name='div', attrs={'class': ['note NotePortrait', 'note']}), - dict(name='ul', attrs={'class': re.compile('article__share')}), + dict(name='ul', attrs={'class': re.compile(r'article__share')}), dict(name='div', attrs={'class': 'slideshow__controls'}), dict(name='a', attrs={'role': 'button'}), - dict(name='figure', attrs={'class': re.compile('video')}) + dict(name='figure', attrs={'class': re.compile(r'video')}) ] remove_attributes = ['width', 'height'] diff --git a/recipes/haaretz_en.recipe b/recipes/haaretz_en.recipe index 525194c5d4..951687c1f7 100644 --- a/recipes/haaretz_en.recipe +++ b/recipes/haaretz_en.recipe @@ -33,13 +33,13 @@ class Haaretz_en(BasicNewsRecipe): PREFIX = 'https://www.haaretz.com' LOGIN = 'https://services.haaretz.com/ms-sso/loginUrlEncoded' LOGOUT = 'https://services.haaretz.com/ms-sso/logout' - extra_css = """ + extra_css = ''' body{font-family: Merriweather, "Helvetica Neue", Helvetica, Arial, sans-serif } div.mx time{display: none} div.my time{display: none} div.mq time{display: none} div.mr time{display: none} - """ + ''' conversion_options = { 'comment': description, 'publisher': publisher, 'language': language diff --git a/recipes/habr_ru.recipe b/recipes/habr_ru.recipe index 3c4b7ce4e4..b3bebccfa3 100644 --- a/recipes/habr_ru.recipe +++ b/recipes/habr_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Habr(BasicNewsRecipe): title = '\u0425\u0430\u0431\u0440' __author__ = 'bugmen00t' - description = '\u041D\u0430 \u200B\u0425\u0430\u0431\u0440\u0435 \u200B\u0434\u0443\u043C\u0430\u044E\u0449\u0438\u0435 \u200B\u043B\u044E\u0434\u0438 \u200B\u0434\u0435\u043B\u044F\u0442\u0441\u044F \u200B\u0443\u043D\u0438\u043A\u0430\u043B\u044C\u043D\u044B\u043C \u200B\u200B\u043E\u043F\u044B\u0442\u043E\u043C. \u200B\u200B\u0417\u0434\u0435\u0441\u044C \u0431\u0443\u0434\u0435\u0442 \u200B\u200B\u043E\u0434\u0438\u043D\u0430\u043A\u043E\u0432\u043E \u200B\u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E \u200B\u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0438\u0441\u0442\u0430\u043C \u200B\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C, \u200B\u200B\u0430\u0434\u043C\u0438\u043D\u0430\u043C \u200B\u0438 \u0440\u0435\u043A\u043B\u0430\u043C\u0449\u0438\u043A\u0430\u043C, \u200B\u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430\u043C \u200B\u0438 \u0434\u0438\u0437\u0430\u0439\u043D\u0435\u0440\u0430\u043C, \u200B\u043C\u0435\u043D\u0435\u0434\u0436\u0435\u0440\u0430\u043C \u200B\u0432\u044B\u0441\u0448\u0435\u0433\u043E \u200B\u0438 \u0441\u0440\u0435\u0434\u043D\u0435\u0433\u043E \u200B\u0437\u0432\u0435\u043D\u0430, \u200B\u0432\u043B\u0430\u0434\u0435\u043B\u044C\u0446\u0430\u043C \u200B\u043A\u0440\u0443\u043F\u043D\u044B\u0445 \u200B\u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0439 \u200B\u0438 \u043D\u0435\u0431\u043E\u043B\u044C\u0448\u0438\u0445 \u200B\u0444\u0438\u0440\u043C, \u200B\u0430 \u0442\u0430\u043A\u0436\u0435 \u200B\u0432\u0441\u0435\u043C \u200B\u0442\u0435\u043C, \u200B\u0434\u043B\u044F \u043A\u043E\u0433\u043E \u200BIT \u2014 \u200B\u044D\u0442\u043E \u043D\u0435 \u043F\u0440\u043E\u0441\u0442\u043E \u200B\u0434\u0432\u0435 \u0431\u0443\u043A\u0432\u044B \u200B\u0430\u043B\u0444\u0430\u0432\u0438\u0442\u0430.' # noqa + description = '\u041D\u0430 \u200B\u0425\u0430\u0431\u0440\u0435 \u200B\u0434\u0443\u043C\u0430\u044E\u0449\u0438\u0435 \u200B\u043B\u044E\u0434\u0438 \u200B\u0434\u0435\u043B\u044F\u0442\u0441\u044F \u200B\u0443\u043D\u0438\u043A\u0430\u043B\u044C\u043D\u044B\u043C \u200B\u200B\u043E\u043F\u044B\u0442\u043E\u043C. \u200B\u200B\u0417\u0434\u0435\u0441\u044C \u0431\u0443\u0434\u0435\u0442 \u200B\u200B\u043E\u0434\u0438\u043D\u0430\u043A\u043E\u0432\u043E \u200B\u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E \u200B\u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0438\u0441\u0442\u0430\u043C \u200B\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C, \u200B\u200B\u0430\u0434\u043C\u0438\u043D\u0430\u043C \u200B\u0438 \u0440\u0435\u043A\u043B\u0430\u043C\u0449\u0438\u043A\u0430\u043C, \u200B\u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430\u043C \u200B\u0438 \u0434\u0438\u0437\u0430\u0439\u043D\u0435\u0440\u0430\u043C, \u200B\u043C\u0435\u043D\u0435\u0434\u0436\u0435\u0440\u0430\u043C \u200B\u0432\u044B\u0441\u0448\u0435\u0433\u043E \u200B\u0438 \u0441\u0440\u0435\u0434\u043D\u0435\u0433\u043E \u200B\u0437\u0432\u0435\u043D\u0430, \u200B\u0432\u043B\u0430\u0434\u0435\u043B\u044C\u0446\u0430\u043C \u200B\u043A\u0440\u0443\u043F\u043D\u044B\u0445 \u200B\u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0439 \u200B\u0438 \u043D\u0435\u0431\u043E\u043B\u044C\u0448\u0438\u0445 \u200B\u0444\u0438\u0440\u043C, \u200B\u0430 \u0442\u0430\u043A\u0436\u0435 \u200B\u0432\u0441\u0435\u043C \u200B\u0442\u0435\u043C, \u200B\u0434\u043B\u044F \u043A\u043E\u0433\u043E \u200BIT \u2014 \u200B\u044D\u0442\u043E \u043D\u0435 \u043F\u0440\u043E\u0441\u0442\u043E \u200B\u0434\u0432\u0435 \u0431\u0443\u043A\u0432\u044B \u200B\u0430\u043B\u0444\u0430\u0432\u0438\u0442\u0430.' # noqa: E501 publisher = 'Habr Blockchain Publishing LTD' category = 'blog' cover_url = u'https://habr.com/img/habr_ru.png' diff --git a/recipes/hackernews.recipe b/recipes/hackernews.recipe index ef6e367d40..3d9656c158 100644 --- a/recipes/hackernews.recipe +++ b/recipes/hackernews.recipe @@ -71,14 +71,12 @@ class HNWithCommentsLink(BasicNewsRecipe): for td in main.findAll('td', 'default'): comhead = td.find('span', 'comhead') if comhead: - com_title = u'

' + \ - self.tag_to_string(comhead).replace( - ' | link', '') + u'

' + com_title = u'

' + self.tag_to_string(comhead).replace(' | link', '') + u'

' comhead.parent.extract() br = td.find('br') if br: br.extract() - reply = td.find('a', attrs={'href': re.compile('^reply?')}) + reply = td.find('a', attrs={'href': re.compile(r'^reply?')}) if reply: reply.parent.extract() td.name = 'div' @@ -86,8 +84,8 @@ class HNWithCommentsLink(BasicNewsRecipe): td['style'] = 'padding-left: ' + str(indent_width) + 'px' comments = comments + com_title + td.prettify() - body = u'

' + title + u'

' + readable_link + \ - u'
' + subtext + u'

' + title_content + u'
' + body = (u'

' + title + u'

' + readable_link + + u'
' + subtext + u'

' + title_content + u'
') body = body + comments return u'' + title + u'' + body + '' @@ -114,8 +112,7 @@ class HNWithCommentsLink(BasicNewsRecipe): else: content = self.get_readable_content(url) - # content = re.sub(r'\s*\s*$', '', content) + \ - # article.summary + '' + # content = re.sub(r'\s*\s*$', '', content) + article.summary + '' if not isinstance(content, bytes): content = content.encode('utf-8') @@ -136,7 +133,7 @@ class HNWithCommentsLink(BasicNewsRecipe): article.text_summary = self.prettyify_url(article.url) article.summary = article.text_summary -# def parse_index(self): -# feeds = [] -# feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'https://news.ycombinator.com/item?id=2935944'}])) -# return feeds + # def parse_index(self): + # feeds = [] + # feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'https://news.ycombinator.com/item?id=2935944'}])) + # return feeds diff --git a/recipes/hamilton_spectator.recipe b/recipes/hamilton_spectator.recipe index 92180b75ea..57aed48530 100644 --- a/recipes/hamilton_spectator.recipe +++ b/recipes/hamilton_spectator.recipe @@ -5,10 +5,12 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes Hamilton Spectator Calibre Recipe ''' + def absurl(url): if url.startswith('/'): return 'https://www.thespec.com' + url + class HamiltonSpectator(BasicNewsRecipe): title = u'Hamilton Spectator' max_articles_per_feed = 50 diff --git a/recipes/handelsblatt.recipe b/recipes/handelsblatt.recipe index f8285cb48a..3984579592 100644 --- a/recipes/handelsblatt.recipe +++ b/recipes/handelsblatt.recipe @@ -59,7 +59,7 @@ class Handelsblatt(BasicNewsRecipe): dict(name='aside', attrs={'class': ['vhb-article-element vhb-left', 'vhb-article-element vhb-left vhb-teasergallery', 'vhb-article-element vhb-left vhb-shorttexts']}), - dict(name='aside', attrs={'class': re.compile('vhb-club-events')}), + dict(name='aside', attrs={'class': re.compile(r'vhb-club-events')}), dict(name='article', attrs={'class': ['vhb-imagegallery vhb-teaser', 'vhb-teaser vhb-type-video']}), dict(name='small', attrs={'class': ['vhb-credit']}), @@ -70,14 +70,14 @@ class Handelsblatt(BasicNewsRecipe): 'opinary-widget-wrapper', 'vhb-article__content-element--shorttextgallery', 'vhb-hollow-area vhb-hollow-area--col-1']}), - dict(name='div', attrs={'class': re.compile('stepstone')}), - dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}), + dict(name='div', attrs={'class': re.compile(r'stepstone')}), + dict(name='div', attrs={'class': re.compile(r'vhb-imagegallery')}), dict(name='div', attrs={'id': ['highcharts_infografik']}), - dict(name='div', attrs={'id': re.compile('dax-sentiment')}), - dict(name=['div', 'section'], attrs={'class': re.compile('slider')}), + dict(name='div', attrs={'id': re.compile(r'dax-sentiment')}), + dict(name=['div', 'section'], attrs={'class': re.compile(r'slider')}), dict(name='a', attrs={'class': ['twitter-follow-button']}), dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar', 'pin-icon']}), - dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}), + dict(name='img', attrs={'alt': re.compile(r'Handelsblatt Morning Briefing')}), dict(name=['blockquote', 'button', 'link']) ] @@ -92,17 +92,17 @@ class Handelsblatt(BasicNewsRecipe): (re.compile(r'( data-src=")([^"]*")', re.IGNORECASE), lambda match: ' src="' + match.group(2)) ] - extra_css = 'h2 {font-size: 1em; text-align: left} \ - h3 {font-size: 1em; text-align: left} \ - h4 {font-size: 1em; text-align: left; margin-bottom: 0em} \ - em {font-style: normal; font-weight: bold} \ - .vhb-subline {font-weight: normal; text-transform: uppercase} \ - .vhb-headline {font-size: 1.6em} \ - .vhb-teaser-head {margin-top: 1em; margin-bottom: 1em} \ - .vhb-hollow-area--innercontent {font-size: 0.6em} \ - .hcf-location-mark {font-weight: bold} \ - .lb-post-header {margin-top: 1em} \ - .panel-body p {margin-top: 0em}' + extra_css = '''h2 {font-size: 1em; text-align: left} + h3 {font-size: 1em; text-align: left} + h4 {font-size: 1em; text-align: left; margin-bottom: 0em} + em {font-style: normal; font-weight: bold} + .vhb-subline {font-weight: normal; text-transform: uppercase} + .vhb-headline {font-size: 1.6em} + .vhb-teaser-head {margin-top: 1em; margin-bottom: 1em} + .vhb-hollow-area--innercontent {font-size: 0.6em} + .hcf-location-mark {font-weight: bold} + .lb-post-header {margin-top: 1em} + .panel-body p {margin-top: 0em}''' def get_browser(self): br = BasicNewsRecipe.get_browser(self) @@ -138,7 +138,7 @@ class Handelsblatt(BasicNewsRecipe): def postprocess_html(self, soup, first_fetch): # convert lists of author(s) and date(s) into simple text - for cap in soup.find_all('div', {'class': re.compile('vhb-article-caption')}): + for cap in soup.find_all('div', {'class': re.compile(r'vhb-article-caption')}): cap.replace_with(cap.encode_contents().decode('utf-8').strip() + ' ') for row in soup.find_all('div', {'class': 'vhb-article-author-row'}): for ul in row.find_all('ul'): @@ -160,7 +160,7 @@ class Handelsblatt(BasicNewsRecipe): fig.find('div', {'class': 'vhb-caption'}).replace_with(cap) # remove references to related articles for strong in soup.find_all('strong'): - if strong.string and (re.match('^Mehr:? ?', strong.string) or re.match('^>>.*', strong.string)): + if strong.string and (re.match(r'^Mehr:? ?', strong.string) or re.match(r'^>>.*', strong.string)): p_parent = strong.find_parent('p') if p_parent: p_parent.decompose() diff --git a/recipes/hankyoreh21.recipe b/recipes/hankyoreh21.recipe index 3113df68e7..f89476c9f9 100644 --- a/recipes/hankyoreh21.recipe +++ b/recipes/hankyoreh21.recipe @@ -36,4 +36,4 @@ class Hankyoreh21(BasicNewsRecipe): def get_article_url(self, article): org_url = BasicNewsRecipe.get_article_url(self, article) - return "http://h21.hani.co.kr" + org_url if org_url[0] == '/' else org_url + return 'http://h21.hani.co.kr' + org_url if org_url[0] == '/' else org_url diff --git a/recipes/hannoversche_zeitung.recipe b/recipes/hannoversche_zeitung.recipe index 9a2525e09a..5b33cec208 100644 --- a/recipes/hannoversche_zeitung.recipe +++ b/recipes/hannoversche_zeitung.recipe @@ -29,9 +29,8 @@ class AdvancedUserRecipe1287519083(BasicNewsRecipe): remove_tags_before = dict(id='modul_artikel') remove_tags_after = dict(id='articlecontent') - remove_tags = dict(id='articlesidebar') - remove_tags = [ + dict(id='articlesidebar'), dict(name='div', attrs={'class': ['articlecomment', 'articlebookmark', 'teaser_anzeige', 'teaser_umfrage', 'navigation', 'subnavigation']}) diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index ee83add22c..e8bd34366c 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -35,7 +35,6 @@ class Harpers(BasicNewsRecipe): remove_tags = [ classes('header-controls') ] - remove_attributes = ["style", "width", "height"] extra_css = ''' img {display:block; margin:0 auto;} @@ -64,8 +63,8 @@ class Harpers(BasicNewsRecipe): } def parse_index(self): - issues_soup = self.index_to_soup("https://harpers.org/issues/") - a_ele = issues_soup.select_one("div.issue-card a") + issues_soup = self.index_to_soup('https://harpers.org/issues/') + a_ele = issues_soup.select_one('div.issue-card a') self.timefmt = ' [' + self.tag_to_string(a_ele.find(attrs={'class':'issue-title'})) + ']' url = a_ele['href'] diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index 4dcde07c4e..8b326a18af 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -12,28 +12,28 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes class HBR(BasicNewsRecipe): - title = "Harvard Business Review" - __author__ = "unkn0wn, updated by ping" + title = 'Harvard Business Review' + __author__ = 'unkn0wn, updated by ping' description = ( - "Harvard Business Review is the leading destination for smart management thinking. " - "Through its flagship magazine, books, and digital content and tools published on HBR.org, " - "Harvard Business Review aims to provide professionals around the world with rigorous insights " - "and best practices to help lead themselves and their organizations more effectively and to " - "make a positive impact." + 'Harvard Business Review is the leading destination for smart management thinking. ' + 'Through its flagship magazine, books, and digital content and tools published on HBR.org, ' + 'Harvard Business Review aims to provide professionals around the world with rigorous insights ' + 'and best practices to help lead themselves and their organizations more effectively and to ' + 'make a positive impact.' ) - language = "en" - masthead_url = "https://hbr.org/resources/css/images/hbr_logo.svg" - publication_type = "magazine" - encoding = "utf-8" + language = 'en' + masthead_url = 'https://hbr.org/resources/css/images/hbr_logo.svg' + publication_type = 'magazine' + encoding = 'utf-8' remove_javascript = True no_stylesheets = True auto_cleanup = False compress_news_images = True - ignore_duplicate_articles = {"url"} - base_url = "https://hbr.org" + ignore_duplicate_articles = {'url'} + base_url = 'https://hbr.org' - remove_attributes = ["height", "width", "style"] - extra_css = """ + remove_attributes = ['height', 'width', 'style'] + extra_css = ''' h1.article-hed { font-size: x-large; margin-bottom: 0.4rem; } .article-dek { font-size: large; font-style: italic; margin-bottom: 1rem; } .article-byline { margin-top: 0.7rem; font-size: medium; font-style: normal; font-weight: bold; } @@ -50,35 +50,35 @@ class HBR(BasicNewsRecipe): padding-top: 0.5rem; font-style: italic; } - """ + ''' keep_only_tags = [ classes( - "headline-container article-dek-group pub-date hero-image-content " - "article-body standard-content" + 'headline-container article-dek-group pub-date hero-image-content ' + 'article-body standard-content' ), ] remove_tags = [ classes( - "left-rail--container translate-message follow-topic " - "newsletter-container by-prefix related-topics--common" + 'left-rail--container translate-message follow-topic ' + 'newsletter-container by-prefix related-topics--common' ), - dict(name=["article-sidebar"]), + dict(name=['article-sidebar']), ] def preprocess_raw_html(self, raw_html, article_url): soup = self.soup(raw_html) # break author byline out of list - byline_list = soup.find("ul", class_="article-byline-list") + byline_list = soup.find('ul', class_='article-byline-list') if byline_list: byline = byline_list.parent byline.append( - ", ".join( + ', '.join( [ self.tag_to_string(author) - for author in byline_list.find_all(class_="article-author") + for author in byline_list.find_all(class_='article-author') ] ) ) @@ -86,44 +86,44 @@ class HBR(BasicNewsRecipe): # Extract full article content content_ele = soup.find( - "content", + 'content', attrs={ - "data-index": True, - "data-page-year": True, - "data-page-month": True, - "data-page-seo-title": True, - "data-page-slug": True, + 'data-index': True, + 'data-page-year': True, + 'data-page-month': True, + 'data-page-seo-title': True, + 'data-page-slug': True, }, ) - endpoint_url = "https://hbr.org/api/article/piano/content?" + urlencode( + endpoint_url = 'https://hbr.org/api/article/piano/content?' + urlencode( { - "year": content_ele["data-page-year"], - "month": content_ele["data-page-month"], - "seotitle": content_ele["data-page-seo-title"], + 'year': content_ele['data-page-year'], + 'month': content_ele['data-page-month'], + 'seotitle': content_ele['data-page-seo-title'], } ) data = { - "contentKey": content_ele["data-index"], - "pageSlug": content_ele["data-page-slug"], + 'contentKey': content_ele['data-index'], + 'pageSlug': content_ele['data-page-slug'], } headers = { - "User-Agent": random_user_agent(), - "Pragma": "no-cache", - "Cache-Control": "no-cache", - "Content-Type": "application/json", - "Referer": article_url, + 'User-Agent': random_user_agent(), + 'Pragma': 'no-cache', + 'Cache-Control': 'no-cache', + 'Content-Type': 'application/json', + 'Referer': article_url, } br = browser() req = Request( endpoint_url, headers=headers, data=json.dumps(data), - method="POST", + method='POST', timeout=self.timeout, ) res = br.open(req) article = json.loads(res.read()) - new_soup = self.soup(article["content"]) + new_soup = self.soup(article['content']) # clear out existing partial content for c in list(content_ele.children): c.extract() # use extract() instead of decompose() because of strings @@ -140,52 +140,52 @@ class HBR(BasicNewsRecipe): def parse_index(self): d = self.recipe_specific_options.get('issue') if not (d and isinstance(d, str)): - soup = self.index_to_soup(f"{self.base_url}/magazine") - a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/")) - cov_url = a.find("img", attrs={"src": True})["src"] + soup = self.index_to_soup(f'{self.base_url}/magazine') + a = soup.find('a', href=lambda x: x and x.startswith('/archive-toc/')) + cov_url = a.find('img', attrs={'src': True})['src'] self.cover_url = urljoin(self.base_url, cov_url) - issue_url = urljoin(self.base_url, a["href"]) + issue_url = urljoin(self.base_url, a['href']) else: issue_url = 'https://hbr.org/archive-toc/BR' + d - mobj = re.search(r"archive-toc/(?P(BR)?\d+)\b", issue_url) + mobj = re.search(r'archive-toc/(?P(BR)?\d+)\b', issue_url) if mobj: self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png' - self.log("Downloading issue:", issue_url) + self.log('Downloading issue:', issue_url) soup = self.index_to_soup(issue_url) - issue_title = soup.find("h1") + issue_title = soup.find('h1') if issue_title: - self.timefmt = f" [{self.tag_to_string(issue_title)}]" + self.timefmt = f' [{self.tag_to_string(issue_title)}]' feeds = OrderedDict() - for h3 in soup.find_all("h3", attrs={"class": "hed"}): - article_link_ele = h3.find("a") + for h3 in soup.find_all('h3', attrs={'class': 'hed'}): + article_link_ele = h3.find('a') if not article_link_ele: continue article_ele = h3.find_next_sibling( - "div", attrs={"class": "stream-item-info"} + 'div', attrs={'class': 'stream-item-info'} ) if not article_ele: continue title = self.tag_to_string(article_link_ele) - url = urljoin(self.base_url, article_link_ele["href"]) + url = urljoin(self.base_url, article_link_ele['href']) - authors_ele = article_ele.select("ul.byline li") - authors = ", ".join([self.tag_to_string(a) for a in authors_ele]) + authors_ele = article_ele.select('ul.byline li') + authors = ', '.join([self.tag_to_string(a) for a in authors_ele]) - article_desc = "" - dek_ele = h3.find_next_sibling("div", attrs={"class": "dek"}) + article_desc = '' + dek_ele = h3.find_next_sibling('div', attrs={'class': 'dek'}) if dek_ele: - article_desc = self.tag_to_string(dek_ele) + " | " + authors + article_desc = self.tag_to_string(dek_ele) + ' | ' + authors section_ele = ( - h3.findParent("li") - .find_previous_sibling("div", **classes("stream-section-label")) - .find("h4") + h3.findParent('li') + .find_previous_sibling('div', **classes('stream-section-label')) + .find('h4') ) section_title = self.tag_to_string(section_ele).title() feeds.setdefault(section_title, []).append( - {"title": title, "url": url, "description": article_desc} + {'title': title, 'url': url, 'description': article_desc} ) return feeds.items() diff --git a/recipes/heise.recipe b/recipes/heise.recipe index 886fad31cf..cae53ef938 100644 --- a/recipes/heise.recipe +++ b/recipes/heise.recipe @@ -68,4 +68,4 @@ class heiseDe(BasicNewsRecipe): ] def get_article_url(self, article): - return article.link + "&view=print" + return article.link + '&view=print' diff --git a/recipes/heise_ct.recipe b/recipes/heise_ct.recipe index 66ec19a02b..c48c29f9a6 100644 --- a/recipes/heise_ct.recipe +++ b/recipes/heise_ct.recipe @@ -108,7 +108,7 @@ class heise_select(BasicNewsRecipe): img = soup.new_tag('img', src=aimg['href'], alt=aimg['data-pswp-bu'], - style="display: block;") + style='display: block;') if img is not None: aimg.replaceWith(img) diff --git a/recipes/heise_ix.recipe b/recipes/heise_ix.recipe index dadbf1ac94..d4d7fcd1b3 100644 --- a/recipes/heise_ix.recipe +++ b/recipes/heise_ix.recipe @@ -109,7 +109,7 @@ class heise_select(BasicNewsRecipe): 'img', src=aimg['href'], alt=aimg['data-pswp-bu'], - style="display: block;" + style='display: block;' ) if img is not None: aimg.replaceWith(img) diff --git a/recipes/high_country_news.recipe b/recipes/high_country_news.recipe index 277a516d3a..1972c58b9e 100644 --- a/recipes/high_country_news.recipe +++ b/recipes/high_country_news.recipe @@ -50,7 +50,7 @@ class HighCountryNews(BasicNewsRecipe): auto_cleanup = False remove_javascript = True remove_empty_feeds = True - remove_attributes = ['width', 'height'] + remove_attributes = ['style', 'width', 'height'] use_embedded_content = False masthead_url = 'http://www.hcn.org/logo.jpg' @@ -69,8 +69,6 @@ class HighCountryNews(BasicNewsRecipe): # caption and credit for description & author of pictures # main-topic - remove_attributes = ['style'] - extra_css = ''' h1 {font-size: 1.6em; text-align: left} h2 {font-size: 1em; font-style: italic; font-weight: normal} diff --git a/recipes/himal_southasian.recipe b/recipes/himal_southasian.recipe index f384bd40b6..13621ac383 100644 --- a/recipes/himal_southasian.recipe +++ b/recipes/himal_southasian.recipe @@ -17,6 +17,7 @@ def get_story(story): for x in story['story-elements']: yield from get_story(x) + def img(img): yield '

' if 'image-s3-key' in img: @@ -25,6 +26,7 @@ def img(img): yield '

' + img['title'] + '
' yield '

' + class himal(BasicNewsRecipe): title = 'Himal Southasian' __author__ = 'unkn0wn' diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index c1719195f8..8027919a81 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -17,7 +17,7 @@ def absurl(url): class TheHindu(BasicNewsRecipe): title = 'The Hindu' __author__ = 'unkn0wn' - description = 'Articles from The Hindu, Today\'s Paper.' + description = "Articles from The Hindu, Today's Paper." language = 'en_IN' no_stylesheets = True masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' @@ -133,7 +133,7 @@ class TheHindu(BasicNewsRecipe): url = absurl(item['href']) desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or '' self.log(' ', title, '\n\t', url) - feeds_dict[section].append({"title": title, "url": url, "description": desc}) - return [(section, articles) for section, articles in feeds_dict.items()] + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) + return list(feeds_dict.items()) else: return [] diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe index 143cdcdd6a..8d6aba19b2 100644 --- a/recipes/hindufeeds.recipe +++ b/recipes/hindufeeds.recipe @@ -74,7 +74,6 @@ class TheHindufeeds(BasicNewsRecipe): src.extract() return soup - def get_cover_url(self): soup = self.index_to_soup('https://www.thehindu.com/todays-paper/') if cover := soup.find(attrs={'class':'hindu-ad'}): diff --git a/recipes/hindustan.recipe b/recipes/hindustan.recipe index 8ca4efe0bd..262ef267f1 100644 --- a/recipes/hindustan.recipe +++ b/recipes/hindustan.recipe @@ -28,7 +28,7 @@ class LiveHindustan(BasicNewsRecipe): remove_tags_after = [classes('stry-bdy')] feeds = [ - ('प्रमुख खबरें' ,'https://feed.livehindustan.com/rss/3127'), + ('प्रमुख खबरें', 'https://feed.livehindustan.com/rss/3127'), ('देश', 'https://feed.livehindustan.com/rss/4911'), ('विदेश', 'https://feed.livehindustan.com/rss/4913'), ('ओपिनियन', 'https://feed.livehindustan.com/rss/5165'), diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe index e90d28a446..4fd78d0363 100644 --- a/recipes/hindustan_times_print.recipe +++ b/recipes/hindustan_times_print.recipe @@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe index = 'https://epaper.hindustantimes.com' + class ht(BasicNewsRecipe): title = 'Hindustan Times Print Edition' language = 'en_IN' @@ -97,9 +98,8 @@ class ht(BasicNewsRecipe): continue desc = page_no self.log('\t', title, ' ', desc) - feeds_dict[section].append({"title": title, "description": desc, "url": url}) - return [(section, articles) for section, articles in feeds_dict.items()] - + feeds_dict[section].append({'title': title, 'description': desc, 'url': url}) + return list(feeds_dict.items()) def preprocess_raw_html(self, raw, *a): data = json.loads(raw) diff --git a/recipes/historia_pl.recipe b/recipes/historia_pl.recipe index 2c36e78c94..173a7ac071 100644 --- a/recipes/historia_pl.recipe +++ b/recipes/historia_pl.recipe @@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Historia_org_pl(BasicNewsRecipe): title = u'Historia.org.pl' __author__ = 'fenuks' - description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.' # noqa + description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.' # noqa: E501 cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg' category = 'history' language = 'pl' diff --git a/recipes/history_today.recipe b/recipes/history_today.recipe index eed6efad0a..43d55648b1 100644 --- a/recipes/history_today.recipe +++ b/recipes/history_today.recipe @@ -49,7 +49,7 @@ class HistoryToday(BasicNewsRecipe): # Go to issue soup = self.index_to_soup('https://www.historytoday.com/contents') cover = soup.find('div', attrs={ - 'id': 'content-area'}).find('img', attrs={'src': re.compile('.*cover.*')})['src'] + 'id': 'content-area'}).find('img', attrs={'src': re.compile(r'.*cover.*')})['src'] self.cover_url = cover self.log(self.cover_url) @@ -59,33 +59,30 @@ class HistoryToday(BasicNewsRecipe): feeds = OrderedDict() section_title = '' - for section in div.findAll('div', attrs={'id': re.compile(r"block\-views\-contents.*")}): + for section in div.findAll('div', attrs={'id': re.compile(r'block\-views\-contents.*')}): section_title = self.tag_to_string( section.find('h2', attrs={'class': 'title'})) sectionbody = section.find('div', attrs={'class': 'view-content'}) - for article in sectionbody.findAll('div', attrs={'class': re.compile(r"views\-row.*")}): + for article in sectionbody.findAll('div', attrs={'class': re.compile(r'views\-row.*')}): articles = [] subarticle = [] subarticle = article.findAll('div') if len(subarticle) < 2: continue title = self.tag_to_string(subarticle[0]) - originalurl = "https://www.historytoday.com" + \ - subarticle[0].span.a['href'].strip() + originalurl = 'https://www.historytoday.com' + subarticle[0].span.a['href'].strip() originalpage = self.index_to_soup(originalurl) - printurl = originalpage.find( - 'div', attrs={'id': 'ht-tools'}).a['href'].strip() - url = "https://www.historytoday.com" + printurl + printurl = originalpage.find('div', attrs={'id': 'ht-tools'}).a['href'].strip() + url = 'https://www.historytoday.com' + printurl desc = self.tag_to_string(subarticle[1]) - articles.append({'title': title, 'url': url, - 'description': desc, 'date': ''}) + articles.append({'title': title, 'url': url,'description': desc, 'date': ''}) if articles: if section_title not in feeds: feeds[section_title] = [] feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] + ans = list(feeds.items()) return ans def cleanup(self): diff --git a/recipes/hna.recipe b/recipes/hna.recipe index 814476dd9e..d84df3b31d 100644 --- a/recipes/hna.recipe +++ b/recipes/hna.recipe @@ -13,7 +13,6 @@ class hnaDe(BasicNewsRecipe): title = 'HNA' description = 'local news from Hessen/Germany' __author__ = 'Oliver Niesner' - use_embedded_content = False language = 'de' use_embedded_content = False timefmt = ' [%d %b %Y]' diff --git a/recipes/horizons.recipe b/recipes/horizons.recipe index baa007e5fd..880b4c230f 100644 --- a/recipes/horizons.recipe +++ b/recipes/horizons.recipe @@ -47,7 +47,7 @@ class horizons(BasicNewsRecipe): url = d else: soup = self.index_to_soup('https://www.cirsd.org/en/horizons') - a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition + a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] # use 1 for previous edition url = a['href'] if url.startswith('/'): url = 'https://www.cirsd.org' + url diff --git a/recipes/hot_cars.recipe b/recipes/hot_cars.recipe index bb7aa0cfd0..87a1d4ba7b 100644 --- a/recipes/hot_cars.recipe +++ b/recipes/hot_cars.recipe @@ -23,7 +23,7 @@ class AdvancedUserRecipe1718126839(BasicNewsRecipe): auto_cleanup = True feeds = [ - #Car Types + # Car Types ('Fast Cars', 'https://www.hotcars.com/feed/category/fast-cars/'), ('Classic Cars', 'https://www.hotcars.com/feed/tag/classic-cars/'), ('Muscle Cars', 'https://www.hotcars.com/feed/category/muscle-cars/'), @@ -32,13 +32,13 @@ class AdvancedUserRecipe1718126839(BasicNewsRecipe): ('Mass Market Cars', 'https://www.hotcars.com/feed/category/mass-market-cars/'), ('Luxury Cars', 'https://www.hotcars.com/feed/tag/luxury-cars/'), ('Motorcycles', 'https://www.hotcars.com/feed/category/motorcycles/'), - #Hot Cars Exclusives + # Hot Cars Exclusives ('Hot Cars Exclusives', 'https://www.hotcars.com/feed/category/hotcars-exclusives/'), ('Car Reviews', 'https://www.hotcars.com/feed/category/car-reviews/'), ('Car Guides', 'https://www.hotcars.com/feed/category/car-guides/'), ('Car Renders', 'https://www.hotcars.com/feed/category/car-renders/'), ('Hot Cars Awards', 'https://www.hotcars.com/feed/category/hotcars-awards/'), - #Car Culture + # Car Culture ('News', 'https://www.hotcars.com/feed/category/news/'), ('JDM Life', 'https://www.hotcars.com/feed/category/jdm-life/'), ('Car TV', 'https://www.hotcars.com/feed/category/cars-on-tv/'), diff --git a/recipes/howtogeek.recipe b/recipes/howtogeek.recipe index c60ee2d90d..abf1297e6a 100644 --- a/recipes/howtogeek.recipe +++ b/recipes/howtogeek.recipe @@ -20,18 +20,18 @@ class AdvancedUserRecipe1716091656(BasicNewsRecipe): auto_cleanup = True feeds = [ - #Desktop + # Desktop ('Desktop', 'https://www.howtogeek.com/feed/category/desktop/'), ('Windows', 'https://www.howtogeek.com/feed/category/windows/'), ('Mac', 'https://www.howtogeek.com/feed/category/mac/'), ('Linux', 'https://www.howtogeek.com/feed/category/linux/'), ('Chromebook', 'https://www.howtogeek.com/feed/category/chromebook-chrome-os/'), - #Mobile + # Mobile ('Mobile', 'https://www.howtogeek.com/feed/category/mobile/'), ('Android', 'https://www.howtogeek.com/feed/category/android/'), ('iOS', 'https://www.howtogeek.com/feed/tag/ios/'), ('Cellular Carriers', 'https://www.howtogeek.com/feed/category/cellular-carriers/'), - #Hardware + # Hardware ('Hardware', 'https://www.howtogeek.com/feed/category/hardware/'), ('Computer Hardware', 'https://www.howtogeek.com/feed/category/hardware/'), ('Wifi & Networking', 'https://www.howtogeek.com/feed/category/wifi-routers/'), @@ -39,7 +39,7 @@ class AdvancedUserRecipe1716091656(BasicNewsRecipe): ('eReaders', 'https://www.howtogeek.com/feed/category/ereaders/'), ('Audio', 'https://www.howtogeek.com/feed/category/audio/'), ('Televisions', 'https://www.howtogeek.com/feed/category/tv/'), - #Web + # Web ('Web', 'https://www.howtogeek.com/feed/category/web/'), ('Web Apps', 'https://www.howtogeek.com/feed/category/apps-web-apps/'), ('Social Media', 'https://www.howtogeek.com/feed/category/social-media/'), @@ -48,11 +48,11 @@ class AdvancedUserRecipe1716091656(BasicNewsRecipe): ('Google', 'https://www.howtogeek.com/feed/category/google/'), ('Microsoft', 'https://www.howtogeek.com/feed/category/microsoft/'), ('Privacy & Security', 'https://www.howtogeek.com/feed/category/privacy-security/'), - #Cutting Edge + # Cutting Edge ('Cutting Edge', 'https://www.howtogeek.com/feed/category/cutting-edge/'), ('Virtual Reality', 'https://www.howtogeek.com/feed/category/virtual-reality/'), ('AI & Machine Learning', 'https://www.howtogeek.com/feed/category/ai-machine-learning/'), - #Lifestyle + # Lifestyle ('Lifestyle', 'https://www.howtogeek.com/feed/category/lifestyle/'), ('Gaming', 'https://www.howtogeek.com/feed/category/video-games/'), ('Streaming', 'https://www.howtogeek.com/feed/category/cord-cutting-streaming/'), @@ -63,7 +63,7 @@ class AdvancedUserRecipe1716091656(BasicNewsRecipe): ('Buying Guides', 'https://www.howtogeek.com/feed/buying-guides/'), ('Deals', 'https://www.howtogeek.com/feed/tag/deals/'), - #Discontinued Feeds - #('Electric Vehicles', 'https://www.howtogeek.com/feed/category/electric-vehicles/'), - #('Cryptocurrency', 'https://www.howtogeek.com/feed/category/cryptocurrency/'), + # Discontinued Feeds + # ('Electric Vehicles', 'https://www.howtogeek.com/feed/category/electric-vehicles/'), + # ('Cryptocurrency', 'https://www.howtogeek.com/feed/category/cryptocurrency/'), ] diff --git a/recipes/hoy.recipe b/recipes/hoy.recipe index 8811b0e079..dd40c12488 100644 --- a/recipes/hoy.recipe +++ b/recipes/hoy.recipe @@ -69,7 +69,7 @@ class Hoy(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['dir'] = self.direction mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): del item['style'] diff --git a/recipes/hurriyet.recipe b/recipes/hurriyet.recipe index 26577b4faa..11dead19b0 100644 --- a/recipes/hurriyet.recipe +++ b/recipes/hurriyet.recipe @@ -35,8 +35,8 @@ class Hurriyet(BasicNewsRecipe): compress_news_images = True # some mild formatting - extra_css = """.news-media { clear: left; } - .news-detail-title { clear:left; }""" + extra_css = '''.news-media { clear: left; } + .news-detail-title { clear:left; }''' keep_only_tags = [ # title @@ -64,4 +64,3 @@ class Hurriyet(BasicNewsRecipe): (u'Ankara', 'https://www.hurriyet.com.tr/rss/ankara'), (u'Ege', 'https://www.hurriyet.com.tr/rss/ege') ] - diff --git a/recipes/id_pixel.recipe b/recipes/id_pixel.recipe index eb74e1efd6..dcd53fe79b 100644 --- a/recipes/id_pixel.recipe +++ b/recipes/id_pixel.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class IdPixel(BasicNewsRecipe): title = '\u0418\u0434\u0435\u0430\u043B\u044C\u043D\u044B\u0439 \u043F\u0438\u043A\u0441\u0435\u043B\u044C' cover_url = u'https://idpixel.ru/i/logo2x.png' - description = '\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u0440\u0435\u0442\u0440\u043E-\u0438\u0433\u0440\u0430\u0445 \u0438 \u0440\u0435\u0442\u0440\u043E-\u0442\u0435\u0445\u043D\u0438\u043A\u0435. \u0412\u043E\u0441\u044C\u043C\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u0438\u0433\u0440\u044B, \u0448\u0435\u0441\u0442\u043D\u0430\u0434\u0446\u0430\u0442\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u043A\u043E\u043D\u0441\u043E\u043B\u0438, \u0434\u043E\u043C\u0430\u0448\u043D\u0438\u0435 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u044B \u0441 \u0438\u0433\u0440\u0430\u043C\u0438 \u043D\u0430 \u043A\u0430\u0441\u0441\u0435\u0442\u0430\u0445 \u0438 \u0442\u0430\u043A \u0434\u0430\u043B\u0435\u0435. \u041C\u044B \u0438\u0449\u0435\u043C \u0440\u0435\u0442\u0440\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u0441\u0432\u0435\u0442\u0443 \u0438 \u0434\u043E\u043D\u043E\u0441\u0438\u043C \u0438\u0445 \u0434\u043E \u0432\u0430\u0441.' # noqa + description = '\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u0440\u0435\u0442\u0440\u043E-\u0438\u0433\u0440\u0430\u0445 \u0438 \u0440\u0435\u0442\u0440\u043E-\u0442\u0435\u0445\u043D\u0438\u043A\u0435. \u0412\u043E\u0441\u044C\u043C\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u0438\u0433\u0440\u044B, \u0448\u0435\u0441\u0442\u043D\u0430\u0434\u0446\u0430\u0442\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u043A\u043E\u043D\u0441\u043E\u043B\u0438, \u0434\u043E\u043C\u0430\u0448\u043D\u0438\u0435 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u044B \u0441 \u0438\u0433\u0440\u0430\u043C\u0438 \u043D\u0430 \u043A\u0430\u0441\u0441\u0435\u0442\u0430\u0445 \u0438 \u0442\u0430\u043A \u0434\u0430\u043B\u0435\u0435. \u041C\u044B \u0438\u0449\u0435\u043C \u0440\u0435\u0442\u0440\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u0441\u0432\u0435\u0442\u0443 \u0438 \u0434\u043E\u043D\u043E\u0441\u0438\u043C \u0438\u0445 \u0434\u043E \u0432\u0430\u0441.' # noqa: E501 publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0421\u0443\u0434\u0430\u043A\u043E\u0432' category = 'news' __author__ = 'bugmen00t' diff --git a/recipes/idnes.recipe b/recipes/idnes.recipe index af359f2ec2..3375979f0a 100644 --- a/recipes/idnes.recipe +++ b/recipes/idnes.recipe @@ -26,9 +26,8 @@ class iHeuteRecipe(BasicNewsRecipe): no_stylesheets = True remove_attributes = ['width', 'height'] - remove_tags = [dict(name='div', attrs={'id': ['zooming']}), - dict(name='div', attrs={ - 'class': ['related', 'mapa-wrapper']}), + remove_tags = [dict(name='div', attrs={'id': ['zooming']}), + dict(name='div', attrs={'class': ['related', 'mapa-wrapper']}), dict(name='table', attrs={'id': ['opener-img', 'portal']}), dict(name='table', attrs={'class': ['video-16ku9']})] remove_tags_after = [ @@ -39,7 +38,7 @@ class iHeuteRecipe(BasicNewsRecipe): def print_version(self, url): print_url = url - split_url = url.split("?") + split_url = url.split('?') if (split_url[0].rfind('dilbert.asp') != -1): # dilbert komix print_url = print_url.replace('.htm', '.gif&tisk=1') print_url = print_url.replace('.asp', '.aspx') diff --git a/recipes/ieee_spectrum_mag.recipe b/recipes/ieee_spectrum_mag.recipe index 11c65b91f2..f0a590f0fb 100644 --- a/recipes/ieee_spectrum_mag.recipe +++ b/recipes/ieee_spectrum_mag.recipe @@ -7,18 +7,18 @@ from calibre.web.feeds.news import BasicNewsRecipe class IEEESpectrumMagazine(BasicNewsRecipe): - title = "IEEE Spectrum Magazine" + title = 'IEEE Spectrum Magazine' language = 'en' __author__ = 'yodha8' - description = "Published on day 1 of every month." + description = 'Published on day 1 of every month.' oldest_article = 120 # Mag gathers articles published older than a month online. So we scan for 4 months in the feed. max_articles_per_feed = 100 auto_cleanup = True # RSS feed for the current month now = datetime.datetime.now() - year_month = now.strftime("%Y/%B").lower() - month_feed_url = "https://spectrum.ieee.org/feeds/magazine/{}.rss".format( + year_month = now.strftime('%Y/%B').lower() + month_feed_url = 'https://spectrum.ieee.org/feeds/magazine/{}.rss'.format( year_month ) @@ -27,9 +27,9 @@ class IEEESpectrumMagazine(BasicNewsRecipe): ] def get_cover_url(self): - """Go to this month's URL and pull cover image from there.""" - month_url = "https://spectrum.ieee.org/magazine/{}".format(self.year_month) + '''Go to this month's URL and pull cover image from there.''' + month_url = 'https://spectrum.ieee.org/magazine/{}'.format(self.year_month) soup = self.index_to_soup(month_url) - img_meta = soup.find("meta", property="og:image") - img_url = img_meta["content"] + img_meta = soup.find('meta', property='og:image') + img_url = img_meta['content'] return img_url diff --git a/recipes/ifzm.recipe b/recipes/ifzm.recipe index b9c962eab5..8cc96344f8 100644 --- a/recipes/ifzm.recipe +++ b/recipes/ifzm.recipe @@ -8,6 +8,7 @@ def absurl(url): if url.startswith('/'): return 'https://www.infzm.com' + url + def json_to_html(raw, link): data = json.loads(raw) data = data['data']['content'] @@ -39,19 +40,18 @@ class infzm(BasicNewsRecipe): def get_obfuscated_article(self, url): br = self.get_browser() link = url - res_link = link.replace('https://www.infzm.com', 'https://api.infzm.com/mobile') \ - + '?platform=wap&version=1.89.0&machine_id=35458aa29603f2b246636e5492122b50&user_id=&token=&member_type=' + res_link = (link.replace('https://www.infzm.com', 'https://api.infzm.com/mobile') + + '?platform=wap&version=1.89.0&machine_id=35458aa29603f2b246636e5492122b50&user_id=&token=&member_type=') # if article is paywalled, add code to figure out machine_id raw = br.open(res_link).read() html = json_to_html(raw, link) - return ({ 'data': html, 'url': link }) + return ({'data': html, 'url': link}) extra_css = ''' img {display:block; margin:0 auto;} .cm_pic_caption, .cm_pic_author { font-size:small; text-align:center; } ''' - def parse_index(self): index = 'https://www.infzm.com/' sections = [ diff --git a/recipes/il_fatto.recipe b/recipes/il_fatto.recipe index 66af6e3d2f..800749978d 100644 --- a/recipes/il_fatto.recipe +++ b/recipes/il_fatto.recipe @@ -10,29 +10,21 @@ class AdvancedUserRecipe1286477122(BasicNewsRecipe): __author__ = 'egilh' feeds = [ - (u'Politica & Palazzo', - u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'), - (u'Giustizia & impunit\xe0', - u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'), - (u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'), - (u'Economia & Lobby', - u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'), - (u'Lavoro & precari', - u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'), - (u'Ambiente & Veleni', - u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'), - (u'Sport & miliardi', - u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'), + (u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'), + (u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'), + (u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'), + (u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'), + (u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'), + (u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'), + (u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'), (u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'), (u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'), - (u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'), + (u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'), (u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'), (u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'), (u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'), - (u'Piacere quotidiano', - u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'), - (u'Cervelli in fuga', - u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'), + (u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'), + (u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'), (u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'), (u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/') ] diff --git a/recipes/il_messaggero.recipe b/recipes/il_messaggero.recipe index ad6ac66af7..26feeb0153 100644 --- a/recipes/il_messaggero.recipe +++ b/recipes/il_messaggero.recipe @@ -16,7 +16,7 @@ class IlMessaggero(BasicNewsRecipe): __author__ = 'Gabriele Marini' description = 'Italian News' -# cover_url = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif' + # cover_url = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif' title = u'Il Messaggero' publisher = 'Caltagirone Editore' category = 'News, politics, culture, economy, general interest' @@ -36,22 +36,21 @@ class IlMessaggero(BasicNewsRecipe): dict(name='h2', attrs={ 'class': ['sottotitLettura', 'grigio16']}), dict(name='span', attrs={'class': 'testoArticoloG'}), - dict(name='div', attrs={'id': 'testodim'}) + dict(name='div', attrs={'id': 'testodim'}) ] def get_cover_url(self): cover = None st = time.localtime() year = str(st.tm_year) - month = "%.2d" % st.tm_mon - day = "%.2d" % st.tm_mday - cover = 'http://carta.ilmessaggero.it/' + year + \ - month + day + '/jpeg/MSGR_20_CITTA_1.jpg' + month = '%.2d' % st.tm_mon + day = '%.2d' % st.tm_mday + cover = 'http://carta.ilmessaggero.it/' + year + month + day + '/jpeg/MSGR_20_CITTA_1.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif' return cover diff --git a/recipes/il_post.recipe b/recipes/il_post.recipe index 42a08d5964..9fa8a31638 100644 --- a/recipes/il_post.recipe +++ b/recipes/il_post.recipe @@ -14,27 +14,27 @@ from datetime import date, timedelta from calibre.web.feeds.news import BasicNewsRecipe -dates = [ date.today().strftime('%Y/%m/%d'), (date.today() - timedelta(1)).strftime('%Y/%m/%d') ] +dates = [date.today().strftime('%Y/%m/%d'), (date.today() - timedelta(1)).strftime('%Y/%m/%d')] # ----------- CUSTOMIZATION OPTIONS START ----------- # Comment (add # in front) to disable the sections you are not interested in # Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare sections = [ - ("Italia", "https://www.ilpost.it/italia/"), - ("Mondo", "https://www.ilpost.it/mondo/"), - ("Politica", "https://www.ilpost.it/politica/"), - ("Tecnologia", "https://www.ilpost.it/tecnologia/"), - ("Internet", "https://www.ilpost.it/internet/"), - ("Scienza", "https://www.ilpost.it/scienza/"), - ("Cultura", "https://www.ilpost.it/cultura/"), - ("Economia", "https://www.ilpost.it/economia/"), - ("Sport", "https://www.ilpost.it/sport/"), - ("Media", "https://www.ilpost.it/media/"), - ("Moda", "https://www.ilpost.it/moda/"), - ("Libri", "https://www.ilpost.it/libri/"), - ("Auto", "https://www.ilpost.it/auto/"), - ("Konrad", "https://www.ilpost.it/europa/"), + ('Italia', 'https://www.ilpost.it/italia/'), + ('Mondo', 'https://www.ilpost.it/mondo/'), + ('Politica', 'https://www.ilpost.it/politica/'), + ('Tecnologia', 'https://www.ilpost.it/tecnologia/'), + ('Internet', 'https://www.ilpost.it/internet/'), + ('Scienza', 'https://www.ilpost.it/scienza/'), + ('Cultura', 'https://www.ilpost.it/cultura/'), + ('Economia', 'https://www.ilpost.it/economia/'), + ('Sport', 'https://www.ilpost.it/sport/'), + ('Media', 'https://www.ilpost.it/media/'), + ('Moda', 'https://www.ilpost.it/moda/'), + ('Libri', 'https://www.ilpost.it/libri/'), + ('Auto', 'https://www.ilpost.it/auto/'), + ('Konrad', 'https://www.ilpost.it/europa/'), ] # ----------- CUSTOMIZATION OPTIONS OVER ----------- @@ -45,16 +45,16 @@ class IlPost(BasicNewsRecipe): __license__ = 'GPL v3' __copyright__ = '2019, Marco Scirea ' - title = "Il Post" - language = "it" + title = 'Il Post' + language = 'it' description = ( 'Puoi decidere quali sezioni scaricare modificando la ricetta.' ' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,' - ' la ricetta puo\' essere configurata per tenerle a colori' + " la ricetta puo' essere configurata per tenerle a colori" ) - tags = "news" + tags = 'news' masthead_url = 'https://www.ilpost.it/error/images/ilpost.svg' - ignore_duplicate_articles = {"title", "url"} + ignore_duplicate_articles = {'title', 'url'} no_stylesheets = True extra_css = ' .wp-caption-text { font-size:small; } ' keep_only_tags = [dict(name='main', attrs={'id':lambda x: x and x.startswith('index_main-content__')})] @@ -81,11 +81,11 @@ class IlPost(BasicNewsRecipe): continue self.log('\t', title) entries.append({ - "url": link["href"], - "title": title, - "description": desc + 'url': link['href'], + 'title': title, + 'description': desc }) - return (name, entries) + return name, entries def parse_index(self): feeds = [] diff --git a/recipes/ilmanifesto.recipe b/recipes/ilmanifesto.recipe index 6ea5a50ca7..5ae8ff772e 100644 --- a/recipes/ilmanifesto.recipe +++ b/recipes/ilmanifesto.recipe @@ -34,7 +34,7 @@ class IlManifesto(BasicNewsRecipe): startSoup = self.index_to_soup(startUrl) lastEdition = startSoup.findAll('div', id='accordion_inedicola')[ 1].find('a')['href'] - del(startSoup) + del startSoup self.manifesto_index = MANIFESTO_BASEURL + lastEdition urlsplit = lastEdition.split('/') self.manifesto_datestr = urlsplit[-1] @@ -43,8 +43,7 @@ class IlManifesto(BasicNewsRecipe): def get_cover_url(self): self._set_manifesto_index() - url = MANIFESTO_BASEURL + \ - 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr + url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr return url def parse_index(self): @@ -105,6 +104,6 @@ class IlManifesto(BasicNewsRecipe): if sommNode is not None: summary = sommNode - template = "%(title)s

%(title)s

%(subtitle)s

%(author)s

%(summary)s
%(content)s
" # noqa - del(bs) + template = "%(title)s

%(title)s

%(subtitle)s

%(author)s

%(summary)s
%(content)s
" # noqa: E501 + del bs return template % dict(title=title, subtitle=subtitle, author=author, summary=summary, content=content) diff --git a/recipes/ilsole24ore.recipe b/recipes/ilsole24ore.recipe index e8b678155b..38a5f8d77b 100644 --- a/recipes/ilsole24ore.recipe +++ b/recipes/ilsole24ore.recipe @@ -44,13 +44,13 @@ class IlSole24Ore(BasicNewsRecipe): link = article.get('link', None) if link is None: return article - if link.split('/')[-1] == "story01.htm": + if link.split('/')[-1] == 'story01.htm': link = link.split('/')[-2] a = ['0B', '0C', '0D', '0E', '0F', '0G', '0N', '0L0S', '0A'] b = ['.', '/', '?', '-', '=', '&', '.com', 'www.', '0'] - for i in range(0, len(a)): + for i in range(len(a)): link = link.replace(a[i], b[i]) - link = "http://" + link + link = 'http://' + link return link feeds = [ diff --git a/recipes/in4_pl.recipe b/recipes/in4_pl.recipe index b80b1255a9..b4b8331b35 100644 --- a/recipes/in4_pl.recipe +++ b/recipes/in4_pl.recipe @@ -16,7 +16,7 @@ class in4(BasicNewsRecipe): no_stylesheets = True remove_empty_feeds = True preprocess_regexps = [ - (re.compile(u'', re.DOTALL), lambda match: '')] keep_only_tags = [dict(name='div', attrs={'class': 'left_alone'})] remove_tags_after = dict(name='img', attrs={'title': 'komentarze'}) remove_tags = [dict(name='img', attrs={'title': 'komentarze'})] diff --git a/recipes/in_gr.recipe b/recipes/in_gr.recipe index e9de1d44a8..7354d7cc18 100644 --- a/recipes/in_gr.recipe +++ b/recipes/in_gr.recipe @@ -5,7 +5,7 @@ class ingr(BasicNewsRecipe): title = 'in.gr' __author__ = 'Stelios' description = 'News from Greece' -# max_articles_per_feed = 100 + # max_articles_per_feed = 100 oldest_article = 4 publisher = 'in.gr' category = 'news, GR' @@ -14,7 +14,6 @@ class ingr(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False remove_empty_feeds = True - encoding = 'utf8' keep_only_tags = [ dict(name='h1'), diff --git a/recipes/inc.recipe b/recipes/inc.recipe index f1dfa19c2a..501bff65e8 100644 --- a/recipes/inc.recipe +++ b/recipes/inc.recipe @@ -40,7 +40,7 @@ class IncMagazineRecipe(BasicNewsRecipe): def get_browser(self): def has_login_name(form): try: - form.find_control(name="email") + form.find_control(name='email') except: return False else: diff --git a/recipes/inc42.recipe b/recipes/inc42.recipe index ec43ebdfbe..b5b10066e0 100644 --- a/recipes/inc42.recipe +++ b/recipes/inc42.recipe @@ -47,7 +47,6 @@ class inc42(BasicNewsRecipe): feeds.append((section, articles)) return feeds - def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src':True}): img['src'] = img['data-src'] diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 6076d506dd..53d58b1298 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -22,15 +22,14 @@ class TheIndependentNew(BasicNewsRecipe): title = u'The Independent' __author__ = 'Krittika Goyal' - description = 'The latest in UK News and World News from The \ - Independent. Wide range of international and local news, sports \ - news, commentary and opinion pieces.Independent News - Breaking news \ - that matters. Your daily comprehensive news source - The \ - Independent Newspaper' + description = ('The latest in UK News and World News from The ' + 'Independent. Wide range of international and local news, sports ' + 'news, commentary and opinion pieces.Independent News - Breaking news ' + 'that matters. Your daily comprehensive news source - The ' + 'Independent Newspaper') publisher = 'The Independent' oldest_article = 2.0 ignore_duplicate_articles = {'title', 'url'} - remove_empty_feeds = True category = 'news, UK' no_stylesheets = True use_embedded_content = False diff --git a/recipes/independent_australia.recipe b/recipes/independent_australia.recipe index 1e2f5148da..f0968398dc 100644 --- a/recipes/independent_australia.recipe +++ b/recipes/independent_australia.recipe @@ -20,8 +20,6 @@ class IndependentAustralia(BasicNewsRecipe): description = ( 'Independent Australia is a progressive journal focusing on politics, democracy, the environment, Australian history and Australian identity.' ' It contains news and opinion from Australia and around the world.') - oldest_article = 7 # days - max_articles_per_feed = 100 feeds = [ ( @@ -46,7 +44,7 @@ class IndependentAustralia(BasicNewsRecipe): remove_javascript = True keep_only_tags = [ - dict(name='div', attrs={'class': "art-display"}) + dict(name='div', attrs={'class': 'art-display'}) ] # the article content is contained in # ************************************ @@ -67,13 +65,6 @@ class IndependentAustralia(BasicNewsRecipe): extra_css = '.byline{font-size:smaller;margin-bottom:10px;}.inline-caption{display:block;font-size:smaller;text-decoration: none;}' compress_news_images = True - feeds = [ - ( - 'Independent Australia', - 'https://feeds.feedburner.com/IndependentAustralia' - ), - ] - # ************************************ # Break up feed into categories (based on BrianG's code snippet): # ************************************ diff --git a/recipes/india_speaks_reddit.recipe b/recipes/india_speaks_reddit.recipe index 5866dc379f..c9b9c67bef 100644 --- a/recipes/india_speaks_reddit.recipe +++ b/recipes/india_speaks_reddit.recipe @@ -20,4 +20,3 @@ class IndiaSpeaksReddit(BasicNewsRecipe): feeds = [ ('India Speaks Reddit main feed', 'https://www.reddit.com/r/IndiaSpeaks.rss'), ] - diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index 3f2c960a3b..8654a99ef6 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -75,7 +75,7 @@ class IndiaToday(BasicNewsRecipe): section = x[0] try: return ( - 'Editor\'s Note', 'Cover Story', 'The Big Story', 'Upfront', + "Editor's Note", 'Cover Story', 'The Big Story', 'Upfront', 'NATION', 'INTERVIEW' ).index(section) except Exception: @@ -89,7 +89,7 @@ class IndiaToday(BasicNewsRecipe): return soup def preprocess_raw_html(self, raw, *a): - m = re.search('id="__NEXT_DATA__" type="application/json">', raw) + m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw) raw = raw[m.start():] raw = raw.split('>', 1)[1] data = json.JSONDecoder().raw_decode(raw)[0] @@ -114,7 +114,6 @@ class IndiaToday(BasicNewsRecipe): if 'image_caption' in data: imagecap = '
' + data['image_caption'] + '
' - html = '' + slug + '

' + title + '

\n' + desc + '
'\ - + author + ' ' + city + ' UPDATED: ' + date + '
\n' + image + imagecap + body\ - + '' - return html + return ('' + slug + '

' + title + '

\n' + desc + '
' + + author + ' ' + city + ' UPDATED: ' + date + '
\n' + image + imagecap + body + + '') diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index cd94f157cc..6a185992d4 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -136,13 +136,13 @@ class IndianExpress(BasicNewsRecipe): return citem['content'].replace('300', '600') def preprocess_html(self, soup): - if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))): + if h2 := (soup.find(attrs={'itemprop': 'description'}) or soup.find(**classes('synopsis'))): h2.name = 'p' h2['id'] = 'sub-d' for span in soup.findAll( - "span", attrs={"class": ["ie-custom-caption", "custom-caption"]} + 'span', attrs={'class': ['ie-custom-caption', 'custom-caption']} ): - span["id"] = "img-cap" + span['id'] = 'img-cap' for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}): diff --git a/recipes/ing_dk.recipe b/recipes/ing_dk.recipe index e75e24d5a8..ff47f9da78 100644 --- a/recipes/ing_dk.recipe +++ b/recipes/ing_dk.recipe @@ -21,9 +21,9 @@ class Ing_dk(BasicNewsRecipe): auto_cleanup = True keep_only_tags = [ - dict(name="div", attrs={'class': 'menu-article-current-title'}), - dict(name="section", attrs={'class': 'byline'}), - dict(name="section", attrs={'class': 'body'}), + dict(name='div', attrs={'class': 'menu-article-current-title'}), + dict(name='section', attrs={'class': 'byline'}), + dict(name='section', attrs={'class': 'body'}), ] feeds = [ diff --git a/recipes/inquirer_net.recipe b/recipes/inquirer_net.recipe index 177a2d2fb3..00227b323f 100644 --- a/recipes/inquirer_net.recipe +++ b/recipes/inquirer_net.recipe @@ -15,7 +15,6 @@ class InquirerNet(BasicNewsRecipe): description = 'News from Philipines' oldest_article = 2 max_articles_per_feed = 100 - no_stylesheets = True use_embedded_content = False encoding = 'utf8' publisher = 'inquirer.net' @@ -23,8 +22,6 @@ class InquirerNet(BasicNewsRecipe): lang = 'en' language = 'en' - use_embedded_content = False - no_stylesheets = True auto_cleanup = True diff --git a/recipes/instapaper.recipe b/recipes/instapaper.recipe index 0d91d6dcc5..c7ecf69381 100644 --- a/recipes/instapaper.recipe +++ b/recipes/instapaper.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe # The Gutenweb stylesheet from https://www.mobileread.com/forums/showpost.php?p=2809828&postcount=31 -gutenweb = """"html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td,article,aside,canvas,details,embed,figure,figcaption,footer,header,hgroup,menu,nav,output,ruby,section,summary,time,mark,audio,video{margin:0;padding:0;border:0;font-size:100%;font:inherit;vertical-align:baseline}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{display:block}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:\'\';content:none}table{border-collapse:collapse;border-spacing:0}html,:root{font-size:16px}body{font-size:1em;line-height:1.5em;margin-top:1.5em;margin-bottom:1.5em;max-width:33em;margin-left:auto;margin-right:auto;font-family:Helvetica,Arial,sans-serif;text-align:left;word-spacing:normal;hyphens:auto;orphans:2;widows:2;font-variant-numeric:oldstyle-nums}body *{max-width:100%}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:1.5em;margin-bottom:1.5em}p{margin-top:1.5em;margin-bottom:0em}p+p{margin-top:0em;margin-bottom:0em;text-indent:1.5em}h1{font-size:2.25em;line-height:1.33333em;margin-top:0.66667em;margin-bottom:0.66667em}h2{font-size:1.5em;line-height:1em;margin-top:1em;margin-bottom:1em}h3{font-size:1.3125em;line-height:1.14286em;margin-top:1.14286em;margin-bottom:1.14286em}h4{font-size:1.125em;line-height:1.33333em;margin-top:1.33333em;margin-bottom:1.33333em}h1,h2,h3,h4,h5,h6{font-family:Georgia,serif;font-weight:bold;page-break-after:avoid}ul li{list-style-type:disc}ol li{list-style-type:decimal}li{list-style-position:inside;text-indent:1.5em}dt{font-weight:bold;float:left;margin-right:1.5em}tr{page-break-before:avoid;page-break-after:avoid}td,th{outline:0.1em solid #000;padding:0 0.5em;text-align:left}tfoot td{font-style:italic}caption{font-style:italic;text-align:center;font-style:italic}blockquote{margin-top:2.25em;margin-bottom:2.25em;margin-left:2.25em;margin-right:2.25em}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figure{text-align:center}figure img,figure audio,figure canvas,figure video,figure table{margin-top:0;margin-bottom:0}figcaption{font-size:0.875em;line-height:1.71429em;margin-top:0em;margin-bottom:1.71429em;font-style:italic}img{vertical-align:bottom}code,samp,kbd,var{font-family:Consolas,"Liberation Mono",Courier,monospace;font-size:0.875em;font-weight:normal;font-style:normal;text-decoration:none;line-height:0.875em;padding:0 0.3em}mark{background:#ff0;color:#000}code,.code,samp,kbd,var{background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}em{font-style:italic}strong{font-weight:bold}abbr{letter-spacing:0.1em}abbr[title]{border-bottom:1px dotted #000}cite,q{font-style:italic}q{font-style:italic;quotes:"\xe2\x80\x9c" "\xe2\x80\x9d" "\xe2\x80\x98" "\xe2\x80\x99"}q:before{content:open-quote}q:after{content:close-quote}dfn{font-style:italic}sup,sub{font-size:70%;line-height:70%;position:relative}sup{top:-0.5em}sub{top:0.5em}hr{border-bottom:0.0625em solid #000;border-top:0 none;border-left:0 none;border-right:0 none;margin-top:1.4375em;margin-bottom:1.5em}small{font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}i{font-style:italic}b{font-weight:bold}u{text-decoration:underline}s{text-decoration:line-through}ins{font-weight:bold;text-decoration:underline}del{text-decoration:line-through}.caps,.nums{letter-spacing:0.1em}.caps{font-variant-numeric:lining-nums}.code{overflow:auto;padding:0 1em;background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}.code code,.code samp,.code kbd,.code var{box-shadow:none;padding:0}.chapter{page-break-after:auto;page-break-before:always}.note{text-indent:0;font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}.verse{font-family:inherit;display:table;width:auto;margin-left:auto;margin-right:auto}.toc{margin:0 auto}.toc td,.toc th{outline:0 none}.toc th{padding:0 0.5em 0 0;text-align:right;font-weight:normal}.toc td:before{content:"\\2022";padding-right:0.5em}.toc td{padding:0;text-align:left;font-style:italic}@page{margin-top:72pt;margin-bottom:72pt}@media print{body{font-size:12pt;line-height:18pt;margin-top:0pt;margin-bottom:0pt;font-family:"Times New Roman",Times,serif}p{margin-top:18pt;margin-bottom:0pt}p+p{text-indent:18pt}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:18pt;margin-bottom:18pt}h1{font-size:21pt;line-height:36pt;margin-top:18pt;margin-bottom:18pt}h2{font-size:18pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h3{font-size:16pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h4{font-size:14pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}dt{margin-right:18pt}li{text-indent:18pt}blockquote{margin-top:27pt;margin-bottom:27pt;margin-left:27pt;margin-right:27pt}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figcaption{font-size:10pt;line-height:18pt;margin-top:0pt;margin-bottom:18pt}pre{white-space:pre-line}abbr[title]{border-bottom:0 none}small{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}hr{border-bottom:0.08333em solid #000;margin-top:17pt;margin-bottom:18pt}.note{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}}""" # noqa +gutenweb = '''"html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td,article,aside,canvas,details,embed,figure,figcaption,footer,header,hgroup,menu,nav,output,ruby,section,summary,time,mark,audio,video{margin:0;padding:0;border:0;font-size:100%;font:inherit;vertical-align:baseline}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{display:block}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:\'\';content:none}table{border-collapse:collapse;border-spacing:0}html,:root{font-size:16px}body{font-size:1em;line-height:1.5em;margin-top:1.5em;margin-bottom:1.5em;max-width:33em;margin-left:auto;margin-right:auto;font-family:Helvetica,Arial,sans-serif;text-align:left;word-spacing:normal;hyphens:auto;orphans:2;widows:2;font-variant-numeric:oldstyle-nums}body *{max-width:100%}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:1.5em;margin-bottom:1.5em}p{margin-top:1.5em;margin-bottom:0em}p+p{margin-top:0em;margin-bottom:0em;text-indent:1.5em}h1{font-size:2.25em;line-height:1.33333em;margin-top:0.66667em;margin-bottom:0.66667em}h2{font-size:1.5em;line-height:1em;margin-top:1em;margin-bottom:1em}h3{font-size:1.3125em;line-height:1.14286em;margin-top:1.14286em;margin-bottom:1.14286em}h4{font-size:1.125em;line-height:1.33333em;margin-top:1.33333em;margin-bottom:1.33333em}h1,h2,h3,h4,h5,h6{font-family:Georgia,serif;font-weight:bold;page-break-after:avoid}ul li{list-style-type:disc}ol li{list-style-type:decimal}li{list-style-position:inside;text-indent:1.5em}dt{font-weight:bold;float:left;margin-right:1.5em}tr{page-break-before:avoid;page-break-after:avoid}td,th{outline:0.1em solid #000;padding:0 0.5em;text-align:left}tfoot td{font-style:italic}caption{font-style:italic;text-align:center;font-style:italic}blockquote{margin-top:2.25em;margin-bottom:2.25em;margin-left:2.25em;margin-right:2.25em}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figure{text-align:center}figure img,figure audio,figure canvas,figure video,figure table{margin-top:0;margin-bottom:0}figcaption{font-size:0.875em;line-height:1.71429em;margin-top:0em;margin-bottom:1.71429em;font-style:italic}img{vertical-align:bottom}code,samp,kbd,var{font-family:Consolas,"Liberation Mono",Courier,monospace;font-size:0.875em;font-weight:normal;font-style:normal;text-decoration:none;line-height:0.875em;padding:0 0.3em}mark{background:#ff0;color:#000}code,.code,samp,kbd,var{background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}em{font-style:italic}strong{font-weight:bold}abbr{letter-spacing:0.1em}abbr[title]{border-bottom:1px dotted #000}cite,q{font-style:italic}q{font-style:italic;quotes:"\xe2\x80\x9c" "\xe2\x80\x9d" "\xe2\x80\x98" "\xe2\x80\x99"}q:before{content:open-quote}q:after{content:close-quote}dfn{font-style:italic}sup,sub{font-size:70%;line-height:70%;position:relative}sup{top:-0.5em}sub{top:0.5em}hr{border-bottom:0.0625em solid #000;border-top:0 none;border-left:0 none;border-right:0 none;margin-top:1.4375em;margin-bottom:1.5em}small{font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}i{font-style:italic}b{font-weight:bold}u{text-decoration:underline}s{text-decoration:line-through}ins{font-weight:bold;text-decoration:underline}del{text-decoration:line-through}.caps,.nums{letter-spacing:0.1em}.caps{font-variant-numeric:lining-nums}.code{overflow:auto;padding:0 1em;background-color:#f8f8f8;box-shadow:0 0 0.1em 0.1em #ddd}.code code,.code samp,.code kbd,.code var{box-shadow:none;padding:0}.chapter{page-break-after:auto;page-break-before:always}.note{text-indent:0;font-size:0.875em;line-height:1.71429em;margin-top:1.71429em;margin-bottom:1.71429em}.verse{font-family:inherit;display:table;width:auto;margin-left:auto;margin-right:auto}.toc{margin:0 auto}.toc td,.toc th{outline:0 none}.toc th{padding:0 0.5em 0 0;text-align:right;font-weight:normal}.toc td:before{content:"\\2022";padding-right:0.5em}.toc td{padding:0;text-align:left;font-style:italic}@page{margin-top:72pt;margin-bottom:72pt}@media print{body{font-size:12pt;line-height:18pt;margin-top:0pt;margin-bottom:0pt;font-family:"Times New Roman",Times,serif}p{margin-top:18pt;margin-bottom:0pt}p+p{text-indent:18pt}address,article,aside,audio,canvas,footer,header,ol,ul,dl,pre,section,table,video,img,figure{margin-top:18pt;margin-bottom:18pt}h1{font-size:21pt;line-height:36pt;margin-top:18pt;margin-bottom:18pt}h2{font-size:18pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h3{font-size:16pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}h4{font-size:14pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}dt{margin-right:18pt}li{text-indent:18pt}blockquote{margin-top:27pt;margin-bottom:27pt;margin-left:27pt;margin-right:27pt}blockquote p{margin-top:0em;margin-bottom:0em;text-indent:0}figcaption{font-size:10pt;line-height:18pt;margin-top:0pt;margin-bottom:18pt}pre{white-space:pre-line}abbr[title]{border-bottom:0 none}small{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}hr{border-bottom:0.08333em solid #000;margin-top:17pt;margin-bottom:18pt}.note{font-size:10pt;line-height:18pt;margin-top:18pt;margin-bottom:18pt}}''' # noqa: E501 class InstapaperRecipe(BasicNewsRecipe): diff --git a/recipes/interfax_ua.recipe b/recipes/interfax_ua.recipe index eae204a661..5aecd757cb 100644 --- a/recipes/interfax_ua.recipe +++ b/recipes/interfax_ua.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class InterfaxUAUA(BasicNewsRecipe): title = '\u0406\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0457\u043D\u0430' __author__ = 'bugmen00t' - description = '\u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F \u043F\u0440\u043E \u043E\u0441\u0442\u0430\u043D\u043D\u0456 \u043F\u043E\u0434\u0456\u0457 \u0432 \u043F\u043E\u043B\u0456\u0442\u0438\u0446\u0456 \u0423\u043A\u0440\u0430\u0457\u043D\u0438, \u043A\u043B\u044E\u0447\u043E\u0432\u0456 \u0443\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0456 \u0435\u043A\u043E\u043D\u043E\u043C\u0456\u0447\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438 \u0442\u0430 \u043E\u0441\u043D\u043E\u0432\u043D\u0456 \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0430\u0445 \u0421\u041D\u0414 \u0456 \u0441\u0432\u0456\u0442\u0443.' # noqa - publisher = '\u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u00AB\u0406\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0457\u043D\u0430\u00BB' # noqa + description = '\u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u044F \u043F\u0440\u043E \u043E\u0441\u0442\u0430\u043D\u043D\u0456 \u043F\u043E\u0434\u0456\u0457 \u0432 \u043F\u043E\u043B\u0456\u0442\u0438\u0446\u0456 \u0423\u043A\u0440\u0430\u0457\u043D\u0438, \u043A\u043B\u044E\u0447\u043E\u0432\u0456 \u0443\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0456 \u0435\u043A\u043E\u043D\u043E\u043C\u0456\u0447\u043D\u0456 \u043D\u043E\u0432\u0438\u043D\u0438 \u0442\u0430 \u043E\u0441\u043D\u043E\u0432\u043D\u0456 \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0430\u0445 \u0421\u041D\u0414 \u0456 \u0441\u0432\u0456\u0442\u0443.' # noqa: E501 + publisher = '\u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u00AB\u0406\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0457\u043D\u0430\u00BB' # noqa: E501 category = 'newspaper' cover_url = u'https://interfax.com.ua/static/articles/images/interfax_ukraine_logo_ukr.svg' language = 'uk' diff --git a/recipes/interfax_uk.recipe b/recipes/interfax_uk.recipe index 1f232dbdef..f22dc0c604 100644 --- a/recipes/interfax_uk.recipe +++ b/recipes/interfax_uk.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class InterfaxUARU(BasicNewsRecipe): title = '\u0418\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0438\u043D\u0430' __author__ = 'bugmen00t' - description = '\u0418\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u044F \u043E \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u0445 \u0432 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435 \u0423\u043A\u0440\u0430\u0438\u043D\u044B, \u043A\u043B\u044E\u0447\u0435\u0432\u044B\u0435 \u0443\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0438\u0435 \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u043E\u0441\u043D\u043E\u0432\u043D\u044B\u0435 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0421\u041D\u0413 \u0438 \u043C\u0438\u0440\u0430.' # noqa - publisher = '\u0418\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u00AB\u0418\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0438\u043D\u0430\u00BB' # noqa + description = '\u0418\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u044F \u043E \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u0445 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u0445 \u0432 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435 \u0423\u043A\u0440\u0430\u0438\u043D\u044B, \u043A\u043B\u044E\u0447\u0435\u0432\u044B\u0435 \u0443\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0438\u0435 \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u043E\u0441\u043D\u043E\u0432\u043D\u044B\u0435 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0421\u041D\u0413 \u0438 \u043C\u0438\u0440\u0430.' # noqa: E501 + publisher = '\u0418\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u00AB\u0418\u043D\u0442\u0435\u0440\u0444\u0430\u043A\u0441-\u0423\u043A\u0440\u0430\u0438\u043D\u0430\u00BB' # noqa: E501 category = 'newspaper' cover_url = u'https://interfax.com.ua/static/articles/images/interfax_ukraine_logo_rus.svg' language = 'ru' diff --git a/recipes/internazionale.recipe b/recipes/internazionale.recipe index d2c1a0bbfc..052f3519d7 100644 --- a/recipes/internazionale.recipe +++ b/recipes/internazionale.recipe @@ -12,8 +12,8 @@ class Volkskrant(BasicNewsRecipe): country = 'IT' category = 'news, politics, Italy, world' resolve_internal_links = True - remove_tags_before = { 'name': 'article' } - remove_tags_after = { 'name': 'article' } + remove_tags_before = {'name': 'article'} + remove_tags_after = {'name': 'article'} remove_tags = [ dict( attrs={ @@ -29,13 +29,13 @@ class Volkskrant(BasicNewsRecipe): ), dict(name=['script', 'style']), ] - remove_attributes = ["class", "id", "name", "style"] + remove_attributes = ['class', 'id', 'name', 'style'] encoding = 'utf-8' no_stylesheets = True ignore_duplicate_articles = {'url'} - current_number_url = "https://www.internazionale.it/sommario" - home_url = "https://www.internazionale.it" + current_number_url = 'https://www.internazionale.it/sommario' + home_url = 'https://www.internazionale.it' cover_url = None def extract_article(self, article): @@ -67,8 +67,8 @@ class Volkskrant(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup(self.current_number_url) - self.cover_url = soup.find('span', { 'class': 'img_expand' })['data-src'] - main_container = soup.find('div', { 'class': 'content_data' }) + self.cover_url = soup.find('span', {'class': 'img_expand'})['data-src'] + main_container = soup.find('div', {'class': 'content_data'}) children = main_container.findAll('div', recursive=False) sections = [] current_section = None diff --git a/recipes/iol_za.recipe b/recipes/iol_za.recipe index cd98f85d63..8ddc433fb9 100644 --- a/recipes/iol_za.recipe +++ b/recipes/iol_za.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class IOL_za(BasicNewsRecipe): title = 'IOL News' __author__ = 'Darko Miletic' - description = "South Africa's Premier Online News Source. Discover the world of IOL, News South Africa, Sport, Business, Financial, World News, Entertainment, Technology, Motoring, Travel, Property, Classifieds and more." # noqa + description = "South Africa's Premier Online News Source. Discover the world of IOL, News South Africa, Sport, Business, Financial, World News, Entertainment, Technology, Motoring, Travel, Property, Classifieds and more." # noqa: E501 publisher = 'Independent Newspapers (Pty) Limited.' category = 'news, politics, South Africa' oldest_article = 2 @@ -23,10 +23,10 @@ class IOL_za(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://www.iol.co.za/polopoly_fs/iol-news5-1.989381!/image/464471284.png_gen/derivatives/absolute/464471284.png' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{display: block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/iprofesional.recipe b/recipes/iprofesional.recipe index 27dad0a157..60564f4743 100644 --- a/recipes/iprofesional.recipe +++ b/recipes/iprofesional.recipe @@ -25,7 +25,7 @@ class iProfesional(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://www.iprofesional.com/img/header/logoiprofesional.png' - extra_css = """ + extra_css = ''' body{font-family: "Open Sans", sans-serif} img{margin-bottom: 0.4em; display:block} .tituloprincipal{font-family: WhitneyBold, Arial, sans-serif; @@ -33,18 +33,18 @@ class iProfesional(BasicNewsRecipe): font-size: x-large; display: block; margin-bottom: 1em;} .bajadanh{font-size: small; display: block; margin-bottom: 1em;} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ - dict(name='div', attrs={'class': lambda x: x and 'tituloprincipal' in x.split()}) - ,dict(name='div', attrs={'class': lambda x: x and 'bajadanh' in x.split()}) - ,dict(name='div', attrs={'class': lambda x: x and 'datosautornh' in x.split()}) - ,dict(name='div', attrs={'class': lambda x: x and 'fotonotanh' in x.split()}) - ,dict(name='div', attrs={'class': lambda x: x and 'contenidonotanh' in x.split()}) + dict(name='div', attrs={'class': lambda x: x and 'tituloprincipal' in x.split()}), + dict(name='div', attrs={'class': lambda x: x and 'bajadanh' in x.split()}), + dict(name='div', attrs={'class': lambda x: x and 'datosautornh' in x.split()}), + dict(name='div', attrs={'class': lambda x: x and 'fotonotanh' in x.split()}), + dict(name='div', attrs={'class': lambda x: x and 'contenidonotanh' in x.split()}), ] remove_tags = [ dict(name=['meta', 'link', 'base', 'embed', 'object', 'iframe'])] diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 90cbdd1a87..9f0637435e 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -121,7 +121,7 @@ class IrishTimes(BasicNewsRecipe): 'sec-fetch-site': 'same-origin', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', - }, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''})) + }, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''})) r = br.open(rq) raw = r.read() diff --git a/recipes/istories.recipe b/recipes/istories.recipe index 4984ff0dd3..10169a6c8a 100644 --- a/recipes/istories.recipe +++ b/recipes/istories.recipe @@ -6,7 +6,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class IStories(BasicNewsRecipe): title = u'\u0412\u0430\u0436\u043D\u044B\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438' - description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.' # noqa + description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.' # noqa: E501 __author__ = 'bugmen00t' publisher = 'Roman Anin & Olesya Shmagun' publication_type = 'blog' diff --git a/recipes/ixbt.recipe b/recipes/ixbt.recipe index 24493cb4c2..3ccc1b3bf1 100644 --- a/recipes/ixbt.recipe +++ b/recipes/ixbt.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ixbt(BasicNewsRecipe): title = 'iXBT.com' __author__ = 'bugmen00t' - description = 'Специализированный российский информационно-аналитический сервер, освещающий вопросы аппаратного обеспечения персональных компьютеров, коммуникаций и серверов, 3D-графики и звука, цифрового фото и видео, Hi-Fi аппаратуры и проекторов, мобильной связи и периферии, игровых приложений и многого другого.' # noqa + description = 'Специализированный российский информационно-аналитический сервер, освещающий вопросы аппаратного обеспечения персональных компьютеров, коммуникаций и серверов, 3D-графики и звука, цифрового фото и видео, Hi-Fi аппаратуры и проекторов, мобильной связи и периферии, игровых приложений и многого другого.' # noqa: E501 publisher = 'www.ixbt.com' category = 'news' cover_url = u'https://www.ixbt.com/images/ixbt-logo-new.jpg' diff --git a/recipes/jacobinmag.recipe b/recipes/jacobinmag.recipe index 5804d55fec..2338d62c7c 100644 --- a/recipes/jacobinmag.recipe +++ b/recipes/jacobinmag.recipe @@ -35,11 +35,11 @@ class Jacobinmag(BasicNewsRecipe): issue_url = None PREFIX = 'https://www.jacobinmag.com' LOGIN = 'https://auth.jacobinmag.com/mini_profile?redirect=https%3A%2F%2Fwww.jacobinmag.com%2F' - extra_css = """ + extra_css = ''' body{font-family: Antwerp, 'Times New Roman', Times, serif} img{margin-top:1em; margin-bottom: 1em; display:block} .entry-dek,.entry-author{font-family: Hurme-No3, Futura, sans-serif} - """ + ''' conversion_options = { 'comment': description, diff --git a/recipes/japan_times.recipe b/recipes/japan_times.recipe index eb1db6493c..fad4f6eb2b 100644 --- a/recipes/japan_times.recipe +++ b/recipes/japan_times.recipe @@ -1,36 +1,36 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -__license__ = "GPL v3" +__license__ = 'GPL v3' __copyright__ = ( - "2008-2013, Darko Miletic . " - "2022, Albert Aparicio Isarn " + '2008-2013, Darko Miletic . ' + '2022, Albert Aparicio Isarn ' ) -""" +''' japantimes.co.jp -""" +''' from calibre.web.feeds.news import BasicNewsRecipe class JapanTimes(BasicNewsRecipe): - title = "The Japan Times" - __author__ = "Albert Aparicio Isarn (original recipe by Darko Miletic)" + title = 'The Japan Times' + __author__ = 'Albert Aparicio Isarn (original recipe by Darko Miletic)' description = ( "The latest news from Japan Times, Japan's leading English-language daily newspaper" ) - language = "en_JP" - category = "news, politics, japan" - publisher = "The Japan Times" + language = 'en_JP' + category = 'news, politics, japan' + publisher = 'The Japan Times' oldest_article = 2 max_articles_per_feed = 150 no_stylesheets = True remove_javascript = True use_embedded_content = False - encoding = "utf8" - publication_type = "newspaper" - masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png" - extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}" + encoding = 'utf8' + publication_type = 'newspaper' + masthead_url = 'https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png' + extra_css = 'body{font-family: Geneva,Arial,Helvetica,sans-serif}' recipe_specific_options = { 'days': { @@ -47,37 +47,37 @@ class JapanTimes(BasicNewsRecipe): self.oldest_article = float(d) conversion_options = { - "comment": description, - "tags": category, - "publisher": publisher, - "language": language, + 'comment': description, + 'tags': category, + 'publisher': publisher, + 'language': language, } - remove_tags_before = {"name": "h1"} - remove_tags_after = {"name": "ul", "attrs": {"class": "single-sns-area"}} + remove_tags_before = {'name': 'h1'} + remove_tags_after = {'name': 'ul', 'attrs': {'class': 'single-sns-area'}} keep_only_tags = [ - {"name": "div", "attrs": {"class": "padding_block"}}, + {'name': 'div', 'attrs': {'class': 'padding_block'}}, # {"name": "h5", "attrs": {"class": "writer", "role": "author"}}, # {"name": "p", "attrs": {"class": "credit"}}, ] remove_tags = [ - {"name": "div", "id": "no_js_blocker", "attrs": {"class": "padding_block"}}, - {"name": "div", "attrs": {"class": "single-upper-meta"}}, - {"name": "ul", "attrs": {"class": "single-sns-area"}}, + {'name': 'div', 'id': 'no_js_blocker', 'attrs': {'class': 'padding_block'}}, + {'name': 'div', 'attrs': {'class': 'single-upper-meta'}}, + {'name': 'ul', 'attrs': {'class': 'single-sns-area'}}, ] feeds = [ - (u"Top Stories", u"https://www.japantimes.co.jp/feed/topstories/"), - (u"News", u"https://www.japantimes.co.jp/news/feed/"), - (u"Opinion", u"https://www.japantimes.co.jp/opinion/feed/"), - (u"Life", u"https://www.japantimes.co.jp/life/feed/"), - (u"Community", u"https://www.japantimes.co.jp/community/feed/"), - (u"Culture", u"https://www.japantimes.co.jp/culture/feed/"), - (u"Sports", u"https://www.japantimes.co.jp/sports/feed/"), + (u'Top Stories', u'https://www.japantimes.co.jp/feed/topstories/'), + (u'News', u'https://www.japantimes.co.jp/news/feed/'), + (u'Opinion', u'https://www.japantimes.co.jp/opinion/feed/'), + (u'Life', u'https://www.japantimes.co.jp/life/feed/'), + (u'Community', u'https://www.japantimes.co.jp/community/feed/'), + (u'Culture', u'https://www.japantimes.co.jp/culture/feed/'), + (u'Sports', u'https://www.japantimes.co.jp/sports/feed/'), ] def get_article_url(self, article): rurl = BasicNewsRecipe.get_article_url(self, article) - return rurl.partition("?")[0] + return rurl.partition('?')[0] def preprocess_raw_html(self, raw, url): - return "" + raw[raw.find("") :] + return '' + raw[raw.find('') :] diff --git a/recipes/javalobby.recipe b/recipes/javalobby.recipe index dfd5396dd9..9d884cd5a6 100644 --- a/recipes/javalobby.recipe +++ b/recipes/javalobby.recipe @@ -19,10 +19,10 @@ class Engadget(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False - remove_tags = [dict(name='div', attrs={'class': ["fivestar-static-form-item", "relatedContent", "pagination clearfix", "addResources"]}), - dict(name='div', attrs={'id': ["comments"]})] + remove_tags = [dict(name='div', attrs={'class': ['fivestar-static-form-item', 'relatedContent', 'pagination clearfix', 'addResources']}), + dict(name='div', attrs={'id': ['comments']})] - keep_only_tags = [dict(name='div', attrs={'id': ["article"]})] + keep_only_tags = [dict(name='div', attrs={'id': ['article']})] feeds = [(u'news', u'http://feeds.dzone.com/javalobby/frontpage')] diff --git a/recipes/jijinews.recipe b/recipes/jijinews.recipe index bd2a471e42..02728f2f6d 100644 --- a/recipes/jijinews.recipe +++ b/recipes/jijinews.recipe @@ -23,8 +23,8 @@ class JijiDotCom(BasicNewsRecipe): feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] - remove_tags_before = dict(id="article-area") - remove_tags_after = dict(id="ad_google") + remove_tags_before = dict(id='article-area') + remove_tags_after = dict(id='ad_google') def get_cover_url(self): cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' diff --git a/recipes/joop.recipe b/recipes/joop.recipe index 3464c4a548..16d5092798 100644 --- a/recipes/joop.recipe +++ b/recipes/joop.recipe @@ -36,7 +36,7 @@ class JoopRecipe(BasicNewsRecipe): keep_only_tags.append( dict(name='h2', attrs={'class': 'columnhead smallline'})) keep_only_tags.append( - dict(name='div', attrs={'class': re.compile('article.*')})) + dict(name='div', attrs={'class': re.compile(r'article.*')})) extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif;} diff --git a/recipes/jot_down.recipe b/recipes/jot_down.recipe index b1b9deccd7..b078edd90c 100644 --- a/recipes/jot_down.recipe +++ b/recipes/jot_down.recipe @@ -47,7 +47,7 @@ class jotdown(BasicNewsRecipe): dict(name='div', attrs={'id':'respond'}) ] - remove_tags_after = dict(name='div' , attrs={'id':'respond'}) + remove_tags_after = dict(name='div', attrs={'id':'respond'}) preprocess_regexps = [ # To change the small size of the text diff --git a/recipes/jpost.recipe b/recipes/jpost.recipe index 2c94daead2..ef8281a4cb 100644 --- a/recipes/jpost.recipe +++ b/recipes/jpost.recipe @@ -25,7 +25,6 @@ class JerusalemPost(BasicNewsRecipe): __author__ = 'Kovid Goyal' max_articles_per_feed = 10 - no_stylesheets = True def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') diff --git a/recipes/juventudrebelde.recipe b/recipes/juventudrebelde.recipe index 9dd33892d1..d75d8da8d9 100644 --- a/recipes/juventudrebelde.recipe +++ b/recipes/juventudrebelde.recipe @@ -21,7 +21,7 @@ class JuventudRebelde(BasicNewsRecipe): category = 'Noticias' language = 'es' publication_type = 'Periodico' - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .title{font-weight: bold} .read{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .title{font-weight: bold} .read{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa: E501 preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True diff --git a/recipes/kirkusreviews.recipe b/recipes/kirkusreviews.recipe index 99de429ee3..c3437f2fd7 100644 --- a/recipes/kirkusreviews.recipe +++ b/recipes/kirkusreviews.recipe @@ -4,20 +4,20 @@ from calibre.web.feeds.news import BasicNewsRecipe class KirkusReviews(BasicNewsRecipe): - title = "Kirkus Reviews" - description = ("Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus." - " The magazine is headquartered in New York City. Released twice monthly on the 1st/15th.") - language = "en" - __author__ = "ping" - publication_type = "magazine" + title = 'Kirkus Reviews' + description = ('Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus.' + ' The magazine is headquartered in New York City. Released twice monthly on the 1st/15th.') + language = 'en' + __author__ = 'ping' + publication_type = 'magazine' masthead_url = ( - "https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg" + 'https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg' ) - encoding = "utf-8" + encoding = 'utf-8' remove_javascript = True no_stylesheets = True auto_cleanup = False - ignore_duplicate_articles = {"url"} + ignore_duplicate_articles = {'url'} compress_news_images = True compress_news_images_auto_size = 6 max_articles_per_feed = 99 @@ -25,105 +25,105 @@ class KirkusReviews(BasicNewsRecipe): keep_only_tags = [ dict( class_=[ - "article-author", - "article-author-img-start", - "article-author-description-start", - "single-review", + 'article-author', + 'article-author-img-start', + 'article-author-description-start', + 'single-review', ] ) ] remove_tags = [ dict( class_=[ - "sidebar-content", - "article-social-share-desktop-first", - "article-social-share-desktop-pagination", - "article-social-share-mobile", - "share-review-text", - "like-dislike-article", - "rate-this-book-text", - "input-group", - "user-comments", - "show-all-response-text", - "button-row", - "hide-on-mobile", - "related-article", - "breadcrumb-row", - "shop-now-dropdown", + 'sidebar-content', + 'article-social-share-desktop-first', + 'article-social-share-desktop-pagination', + 'article-social-share-mobile', + 'share-review-text', + 'like-dislike-article', + 'rate-this-book-text', + 'input-group', + 'user-comments', + 'show-all-response-text', + 'button-row', + 'hide-on-mobile', + 'related-article', + 'breadcrumb-row', + 'shop-now-dropdown', ] ) ] - remove_tags_after = [dict(class_="single-review")] + remove_tags_after = [dict(class_='single-review')] - extra_css = """ + extra_css = ''' .image-container img { max-width: 100%; height: auto; margin-bottom: 0.2rem; } .photo-caption { font-size: 0.8rem; margin-bottom: 0.5rem; display: block; } .book-review-img .image-container { text-align: center; } .book-rating-module .description-title { font-size: 1.25rem; margin-left: 0; text-align: center; } - """ + ''' def preprocess_html(self, soup): - h1 = soup.find(class_="article-title") - book_cover = soup.find("ul", class_="book-review-img") + h1 = soup.find(class_='article-title') + book_cover = soup.find('ul', class_='book-review-img') if book_cover: - for li in book_cover.find_all("li"): - li.name = "div" - book_cover.name = "div" + for li in book_cover.find_all('li'): + li.name = 'div' + book_cover.name = 'div' if h1: book_cover.insert_before(h1.extract()) return soup def parse_index(self): - issue_url = "https://www.kirkusreviews.com/magazine/current/" + issue_url = 'https://www.kirkusreviews.com/magazine/current/' soup = self.index_to_soup(issue_url) - issue = soup.find(name="article", class_="issue-container") - cover_img = issue.select(".issue-header .cover-image img") + issue = soup.find(name='article', class_='issue-container') + cover_img = issue.select('.issue-header .cover-image img') if cover_img: - self.cover_url = cover_img[0]["src"] + self.cover_url = cover_img[0]['src'] - h1 = issue.find("h1") + h1 = issue.find('h1') if h1: - self.timefmt = f" [{self.tag_to_string(h1)}]" # edition + self.timefmt = f' [{self.tag_to_string(h1)}]' # edition articles = {} - for book_ele in soup.find_all(name="div", class_="issue-featured-book"): - link = book_ele.find("a") + for book_ele in soup.find_all(name='div', class_='issue-featured-book'): + link = book_ele.find('a') if not link: continue - section = self.tag_to_string(book_ele.find("h3")).upper() + section = self.tag_to_string(book_ele.find('h3')).upper() articles.setdefault(section, []).append( - {"url": urljoin(issue_url, link["href"]), "title": link["title"]} + {'url': urljoin(issue_url, link['href']), 'title': link['title']} ) - for post_ele in issue.select("div.issue-more-posts ul li div.lead-text"): - link = post_ele.find("a") + for post_ele in issue.select('div.issue-more-posts ul li div.lead-text'): + link = post_ele.find('a') if not link: continue - section = self.tag_to_string(post_ele.find(class_="lead-text-type")).upper() + section = self.tag_to_string(post_ele.find(class_='lead-text-type')).upper() articles.setdefault(section, []).append( { - "url": urljoin(issue_url, link["href"]), - "title": self.tag_to_string(link), + 'url': urljoin(issue_url, link['href']), + 'title': self.tag_to_string(link), } ) - for section_ele in issue.select("section.reviews-section"): + for section_ele in issue.select('section.reviews-section'): section_articles = [] - for review in section_ele.select("ul li.starred"): - link = review.select("h4 a") + for review in section_ele.select('ul li.starred'): + link = review.select('h4 a') if not link: continue - description = review.find("p") + description = review.find('p') section_articles.append( { - "url": urljoin(issue_url, link[0]["href"]), - "title": self.tag_to_string(link[0]), - "description": "" + 'url': urljoin(issue_url, link[0]['href']), + 'title': self.tag_to_string(link[0]), + 'description': '' if not description else self.tag_to_string(description), } ) if not section_articles: continue - section = self.tag_to_string(section_ele.find("h3")).upper() + section = self.tag_to_string(section_ele.find('h3')).upper() if section not in articles: articles[section] = [] articles.setdefault(section, []).extend(section_articles) diff --git a/recipes/knife_media.recipe b/recipes/knife_media.recipe index 4e728e2936..60ce16cf2a 100644 --- a/recipes/knife_media.recipe +++ b/recipes/knife_media.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KnifeMedia(BasicNewsRecipe): title = '\u041D\u043E\u0436' __author__ = 'bugmen00t' - description = '\u0418\u043D\u0442\u0435\u043B\u043B\u0435\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0439 \u0436\u0443\u0440\u043D\u0430\u043B \u043E \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0435 \u0438 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435' # noqa + description = '\u0418\u043D\u0442\u0435\u043B\u043B\u0435\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u044B\u0439 \u0436\u0443\u0440\u043D\u0430\u043B \u043E \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0435 \u0438 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435' # noqa: E501 publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0426\u044B\u0433\u0430\u043D, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u041A\u043E\u044D\u043D' category = 'blog' cover_url = u'https://knife.media/feature/pdd/img/knife_logo.33a98aee.svg' diff --git a/recipes/kompiutierra.recipe b/recipes/kompiutierra.recipe index d34a60b365..6e2064ddb1 100644 --- a/recipes/kompiutierra.recipe +++ b/recipes/kompiutierra.recipe @@ -26,7 +26,7 @@ class Computerra(BasicNewsRecipe): remove_tags_after = dict(name='article') - remove_tags = [ + remove_tags = [ dict(name='ul', attrs={'class': 'breadcrumbs'}), dict(name='div', attrs={'class': 'post-info__likes post-info-likes'}), dict(name='div', attrs={'class': 'cta-row'}), diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe index 5e27d99c74..3c1c41c841 100644 --- a/recipes/konflikty_zbrojne.recipe +++ b/recipes/konflikty_zbrojne.recipe @@ -7,7 +7,7 @@ class Konflikty(BasicNewsRecipe): __author__ = 'fenuks' cover_url = 'http://konflikty.pl/wp-content/uploads/2014/07/konflikty.pl-logo.png' language = 'pl' - description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.' # noqa + description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.' # noqa: E501 category = 'military, history' oldest_article = 7 max_articles_per_feed = 100 @@ -17,7 +17,7 @@ class Konflikty(BasicNewsRecipe): dict(name='div', attrs={'id': 'page-content'})] remove_tags = [dict(name='aside'), dict(name='p', attrs={'id': 'breadcrumbs'}), dict(name='div', attrs={'class': [ - 'tags clearfix', 'post-tags clearfix', 'post-nav section-container clearfix', 'about-author section-container clearfix vcard', 'post-related section-container clearfix', ]}), dict(name='div', attrs={'id': 'disqus_thread'})] # noqa + 'tags clearfix', 'post-tags clearfix', 'post-nav section-container clearfix', 'about-author section-container clearfix vcard', 'post-related section-container clearfix', ]}), dict(name='div', attrs={'id': 'disqus_thread'})] # noqa: E501 feeds = [(u'Konflikty Zbrojne', u'http://www.konflikty.pl/feed')] diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index c9189ca643..5b67df334b 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -25,13 +25,13 @@ class KopalniaWiedzy(BasicNewsRecipe): {'name': 'div', 'attrs': {'class': 'article-time-and-cat'}}, {'name': 'p', 'attrs': {'class': 'tags'}}] remove_tags_after = dict(attrs={'class': 'ad-square'}) keep_only_tags = [ - dict(name="div", attrs={'class': 'article-text text-small'})] + dict(name='div', attrs={'class': 'article-text text-small'})] extra_css = '.topimage {margin-top: 30px}' preprocess_regexps = [ - (re.compile(u'
'), + (re.compile(r''), lambda match: ''), - (re.compile(u'

'), + (re.compile(r'

'), lambda match: '
') ] diff --git a/recipes/korben.recipe b/recipes/korben.recipe index 30a8b12cca..aa1b0f3492 100644 --- a/recipes/korben.recipe +++ b/recipes/korben.recipe @@ -17,6 +17,6 @@ class BasicUserRecipe1318619728(BasicNewsRecipe): try: br.open(masthead) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') masthead = None return masthead diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index 8cc7258c1b..a262723abb 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -10,11 +10,9 @@ class Kosmonauta(BasicNewsRecipe): category = 'astronomy' language = 'pl' cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' - no_stylesheets = True oldest_article = 7 no_stylesheets = True remove_javascript = True remove_attributes = ['style'] max_articles_per_feed = 100 feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/feed')] - diff --git a/recipes/kudy_z_nudy.recipe b/recipes/kudy_z_nudy.recipe index 5a5b320e3a..29aa54e224 100644 --- a/recipes/kudy_z_nudy.recipe +++ b/recipes/kudy_z_nudy.recipe @@ -22,15 +22,15 @@ class kudyznudyRecipe(BasicNewsRecipe): cover_url = 'http://www.kudyznudy.cz/App_Themes/KzN/Images/Containers/Header/HeaderLogoKZN.png' remove_javascript = True no_stylesheets = True - extra_css = """ - """ + extra_css = ''' + ''' remove_attributes = [] remove_tags_before = dict( name='div', attrs={'class': ['C_WholeContentPadding']}) remove_tags_after = dict( name='div', attrs={'class': ['SurroundingsContainer']}) - remove_tags = [dict(name='div', attrs={ + remove_tags = [dict(name='div', attrs={ 'class': ['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})] keep_only_tags = [] diff --git a/recipes/kurier.recipe b/recipes/kurier.recipe index d9e7ea0d67..d51cda698f 100644 --- a/recipes/kurier.recipe +++ b/recipes/kurier.recipe @@ -44,16 +44,16 @@ class Kurier(BasicNewsRecipe): ] keep_only_tags = [ - dict(name='article', attrs={'class': re.compile('main-article')}) + dict(name='article', attrs={'class': re.compile(r'main-article')}) ] remove_tags = [ dict(name='div', attrs={'class': 'social-media-container'}), dict(name='section', attrs={'class': 'tags'}), - dict(name='section', attrs={'class': re.compile('comment-box')}), - dict(name='section', attrs={'class': re.compile('related-content')}), - dict(name='section', attrs={'class': re.compile('article-slider')}), - dict(name='section', attrs={'class': re.compile('commentcontainer')}), + dict(name='section', attrs={'class': re.compile(r'comment-box')}), + dict(name='section', attrs={'class': re.compile(r'related-content')}), + dict(name='section', attrs={'class': re.compile(r'article-slider')}), + dict(name='section', attrs={'class': re.compile(r'commentcontainer')}), dict(name='blockquote') ] diff --git a/recipes/kyivpost_en.recipe b/recipes/kyivpost_en.recipe index 344ac169a0..cf0c97d463 100644 --- a/recipes/kyivpost_en.recipe +++ b/recipes/kyivpost_en.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KyivPost(BasicNewsRecipe): title = 'Kyiv Post' __author__ = 'bugmen00t' - description = 'The Kyiv Post is Ukraine\u2019s English-language newspaper. The newspaper\u2019s first print edition came out on Oct. 18, 1995, and went online in 1997. The newspaper\u2019s motto is \u201CUkraine\u2019s Global Voice,\u201D which in 2018 replaced the previous motto of \u201CIndependence. Community. Trust.\u201D Both slogans reflect the newspaper\u2019s commitment to the highest journalistic and ethical standards.' # noqa + description = 'The Kyiv Post is Ukraine\u2019s English-language newspaper. The newspaper\u2019s first print edition came out on Oct. 18, 1995, and went online in 1997. The newspaper\u2019s motto is \u201CUkraine\u2019s Global Voice,\u201D which in 2018 replaced the previous motto of \u201CIndependence. Community. Trust.\u201D Both slogans reflect the newspaper\u2019s commitment to the highest journalistic and ethical standards.' # noqa: E501 publisher = 'BIZNESGRUPP TOV' category = 'newspaper' cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg' diff --git a/recipes/kyivpost_ua.recipe b/recipes/kyivpost_ua.recipe index e4a05c6ecb..4c0ec6ada1 100644 --- a/recipes/kyivpost_ua.recipe +++ b/recipes/kyivpost_ua.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KyivPost(BasicNewsRecipe): title = 'Kyiv Post' __author__ = 'bugmen00t' - description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u0430\u043D\u0433\u043B\u043E\u043C\u043E\u0432\u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u0430, \u043D\u0430\u0433\u043E\u0440\u043E\u0434\u0436\u0435\u043D\u0430 \u0443 2014 \u0440\u043E\u043A\u0443. \u041F\u0435\u0440\u0448\u0438\u0439 \u0434\u0440\u0443\u043A\u043E\u0432\u0430\u043D\u0438\u0439 \u043F\u0440\u0438\u043C\u0456\u0440\u043D\u0438\u043A \u0433\u0430\u0437\u0435\u0442\u0438 \u0432\u0438\u0439\u0448\u043E\u0432 18 \u0436\u043E\u0432\u0442\u043D\u044F 1995 \u0440\u043E\u043A\u0443, \u0430 \u0432 \u0406\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0430\u0440\u0456\u0430\u043D\u0442\u0456 Kyiv Post \u0437\u2019\u044F\u0432\u0438\u043B\u0430\u0441\u044C \u0432 1997 \u0440\u043E\u0446\u0456. \u0414\u0435\u0432\u0456\u0437 \u0433\u0430\u0437\u0435\u0442\u0438: \u00AB\u0413\u043B\u043E\u0431\u0430\u043B\u044C\u043D\u0438\u0439 \u0433\u043E\u043B\u043E\u0441 \u0423\u043A\u0440\u0430\u0457\u043D\u0438\u00BB, \u044F\u043A\u0438\u0439 \u0443 2018 \u0440\u043E\u0446\u0456 \u0437\u0430\u043C\u0456\u043D\u0438\u0432 \u043F\u043E\u043F\u0435\u0440\u0435\u0434\u043D\u0456\u0439 \u0434\u0435\u0432\u0456\u0437 \u00AB\u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0456\u0441\u0442\u044C. \u0421\u043F\u0456\u043B\u044C\u043D\u043E\u0442\u0430. \u0414\u043E\u0432\u0456\u0440\u0430.\u00BB \u041E\u0431\u0438\u0434\u0432\u0430 \u0433\u0430\u0441\u043B\u0430 \u0432\u0456\u0434\u043E\u0431\u0440\u0430\u0436\u0430\u044E\u0442\u044C \u0442\u0435, \u0449\u043E \u0443 \u0432\u0438\u0434\u0430\u043D\u043D\u0456 \u0434\u043E\u0442\u0440\u0438\u043C\u0443\u044E\u0442\u044C\u0441\u044F \u043D\u0430\u0439\u0432\u0438\u0449\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0441\u044C\u043A\u0438\u0445 \u0442\u0430 \u0435\u0442\u0438\u0447\u043D\u0438\u0445 \u0441\u0442\u0430\u043D\u0434\u0430\u0440\u0442\u0456\u0432.' # noqa + description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u0430\u043D\u0433\u043B\u043E\u043C\u043E\u0432\u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u0430, \u043D\u0430\u0433\u043E\u0440\u043E\u0434\u0436\u0435\u043D\u0430 \u0443 2014 \u0440\u043E\u043A\u0443. \u041F\u0435\u0440\u0448\u0438\u0439 \u0434\u0440\u0443\u043A\u043E\u0432\u0430\u043D\u0438\u0439 \u043F\u0440\u0438\u043C\u0456\u0440\u043D\u0438\u043A \u0433\u0430\u0437\u0435\u0442\u0438 \u0432\u0438\u0439\u0448\u043E\u0432 18 \u0436\u043E\u0432\u0442\u043D\u044F 1995 \u0440\u043E\u043A\u0443, \u0430 \u0432 \u0406\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0430\u0440\u0456\u0430\u043D\u0442\u0456 Kyiv Post \u0437\u2019\u044F\u0432\u0438\u043B\u0430\u0441\u044C \u0432 1997 \u0440\u043E\u0446\u0456. \u0414\u0435\u0432\u0456\u0437 \u0433\u0430\u0437\u0435\u0442\u0438: \u00AB\u0413\u043B\u043E\u0431\u0430\u043B\u044C\u043D\u0438\u0439 \u0433\u043E\u043B\u043E\u0441 \u0423\u043A\u0440\u0430\u0457\u043D\u0438\u00BB, \u044F\u043A\u0438\u0439 \u0443 2018 \u0440\u043E\u0446\u0456 \u0437\u0430\u043C\u0456\u043D\u0438\u0432 \u043F\u043E\u043F\u0435\u0440\u0435\u0434\u043D\u0456\u0439 \u0434\u0435\u0432\u0456\u0437 \u00AB\u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0456\u0441\u0442\u044C. \u0421\u043F\u0456\u043B\u044C\u043D\u043E\u0442\u0430. \u0414\u043E\u0432\u0456\u0440\u0430.\u00BB \u041E\u0431\u0438\u0434\u0432\u0430 \u0433\u0430\u0441\u043B\u0430 \u0432\u0456\u0434\u043E\u0431\u0440\u0430\u0436\u0430\u044E\u0442\u044C \u0442\u0435, \u0449\u043E \u0443 \u0432\u0438\u0434\u0430\u043D\u043D\u0456 \u0434\u043E\u0442\u0440\u0438\u043C\u0443\u044E\u0442\u044C\u0441\u044F \u043D\u0430\u0439\u0432\u0438\u0449\u0438\u0445 \u0436\u0443\u0440\u043D\u0430\u043B\u0456\u0441\u0442\u0441\u044C\u043A\u0438\u0445 \u0442\u0430 \u0435\u0442\u0438\u0447\u043D\u0438\u0445 \u0441\u0442\u0430\u043D\u0434\u0430\u0440\u0442\u0456\u0432.' # noqa: E501 publisher = 'BIZNESGRUPP TOV' category = 'newspaper' cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg' diff --git a/recipes/kyungyhang.recipe b/recipes/kyungyhang.recipe index c755d50c7d..31b1ba6379 100644 --- a/recipes/kyungyhang.recipe +++ b/recipes/kyungyhang.recipe @@ -21,7 +21,7 @@ class Kyungyhang(BasicNewsRecipe): remove_javascript = True preprocess_regexps = [ - (re.compile("
", + (re.compile(r"
", re.DOTALL | re.IGNORECASE), lambda match: ''), ] diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe index cbf806c3ef..355d6a6017 100644 --- a/recipes/la_jornada.recipe +++ b/recipes/la_jornada.recipe @@ -33,10 +33,10 @@ class LaJornada_mx(BasicNewsRecipe): use_embedded_content = False language = 'es_MX' remove_empty_feeds = True - cover_url = strftime("http://www.jornada.com.mx/%Y/%m/%d/portada.pdf") + cover_url = strftime('http://www.jornada.com.mx/%Y/%m/%d/portada.pdf') masthead_url = 'http://www.jornada.com.mx/v7.0/imagenes/la-jornada-trans.png' publication_type = 'newspaper' - extra_css = """ + extra_css = ''' body{font-family: "Times New Roman",serif } .cabeza{font-size: xx-large; font-weight: bold } .documentFirstHeading{font-size: xx-large; font-weight: bold } @@ -54,7 +54,7 @@ class LaJornada_mx(BasicNewsRecipe): .text{margin-top: 1.4em} p.inicial{display: inline; font-size: xx-large; font-weight: bold} p.s-s{display: inline; text-indent: 0} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -111,7 +111,7 @@ class LaJornada_mx(BasicNewsRecipe): def get_article_url(self, article): # Get link to original article URL - rurl = article.get('guid', None) + rurl = article.get('guid', None) if not rurl: # Use the "link" attribute as failover return article.get('link', None) diff --git a/recipes/la_nacion_cr.recipe b/recipes/la_nacion_cr.recipe index 5462e2fb74..a8b776fb10 100644 --- a/recipes/la_nacion_cr.recipe +++ b/recipes/la_nacion_cr.recipe @@ -5,7 +5,7 @@ class crnews(BasicNewsRecipe): __author__ = 'Douglas Delgado' title = u'La Nacion Costa Rica' publisher = 'GRUPO NACION GN, S. A.' - description = 'Diario de circulacion nacional de Costa Rica. Recipe creado por Douglas Delgado (doudelgado@gmail.com) para su uso con Calibre por Kovid Goyal' # noqa + description = 'Diario de circulacion nacional de Costa Rica. Recipe creado por Douglas Delgado (doudelgado@gmail.com) para su uso con Calibre por Kovid Goyal' # noqa: E501 category = 'Spanish, Entertainment' masthead_url = 'http://www.nacion.com/App_Themes/nacioncom/Images/logo_nacioncom.png' diff --git a/recipes/la_nueva.recipe b/recipes/la_nueva.recipe index 43ffa73516..5b9887d4f0 100644 --- a/recipes/la_nueva.recipe +++ b/recipes/la_nueva.recipe @@ -36,14 +36,14 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe): name='div', attrs={'id': ['noticia_texto']}) ] - extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: justify; } h2{ font-family: sans-serif; font-size:120%; font-weight: 500; text-align: justify } ' # noqa + extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: justify; } h2{ font-family: sans-serif; font-size:120%; font-weight: 500; text-align: justify } ' # noqa: E501 remove_tags_before = dict(name='div', attrs={'class': ['contenedor']}) remove_tags_after = dict(name='div', attrs={'class': ['fin_noticia']}) remove_tags = [ dict(name='div', attrs={'class': ['epigrafe', 'antetitulo', 'bloqueclear', 'bloqueclear_video', 'cuadro_multimedia', 'cintillo2', - 'editor_documentos', 'noticiadd', 'noticiadd3', 'noticiainterior', 'fin_noticia']}), dict(name='div', attrs={'id': ['evotos']}) # noqa + 'editor_documentos', 'noticiadd', 'noticiadd3', 'noticiainterior', 'fin_noticia']}), dict(name='div', attrs={'id': ['evotos']}) # noqa: E501 ] feeds = [ diff --git a/recipes/la_republica.recipe b/recipes/la_republica.recipe index d0ca63bd8d..9efeaeacbd 100644 --- a/recipes/la_republica.recipe +++ b/recipes/la_republica.recipe @@ -15,9 +15,9 @@ from calibre.web.feeds.news import BasicNewsRecipe class LaRepubblica(BasicNewsRecipe): title = 'La Repubblica' __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971' - description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' # noqa + description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' # noqa: E501 masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' - publisher = 'Gruppo editoriale L\'Espresso' + publisher = "Gruppo editoriale L'Espresso" category = 'News, politics, culture, economy, general interest' language = 'it' timefmt = '[%a, %d %b, %Y]' @@ -28,9 +28,9 @@ class LaRepubblica(BasicNewsRecipe): publication_type = 'newspaper' articles_are_obfuscated = True temp_files = [] - extra_css = """ + extra_css = ''' img{display: block} - """ + ''' remove_attributes = ['width', 'height', 'lang', 'xmlns:og', 'xmlns:fb'] @@ -50,7 +50,7 @@ class LaRepubblica(BasicNewsRecipe): html = response.read() count = 10 except: - print("Retrying download...") + print('Retrying download...') count += 1 self.temp_files.append(PersistentTemporaryFile('_fa.html')) self.temp_files[-1].write(html) diff --git a/recipes/la_stampa.recipe b/recipes/la_stampa.recipe index 820016c60e..9970590d5a 100644 --- a/recipes/la_stampa.recipe +++ b/recipes/la_stampa.recipe @@ -26,7 +26,7 @@ class LaStampa(BasicNewsRecipe): conversion_options = {'linearize_tables': True} remove_attributes = ['colspan'] - extra_css = ' .boxocchiello2{font-size: small} .catenaccio{font-style: italic} .titoloRub{font-size: xx-large; font-weight: bold } .titologir{font-size: xx-large; font-weight: bold } .sezione{font-weight: bold} ' # noqa + extra_css = ' .boxocchiello2{font-size: small} .catenaccio{font-style: italic} .titoloRub{font-size: xx-large; font-weight: bold } .titologir{font-size: xx-large; font-weight: bold } .sezione{font-weight: bold} ' # noqa: E501 def get_article_url(self, article): link = article.get('links') @@ -34,7 +34,7 @@ class LaStampa(BasicNewsRecipe): return link[0]['href'] keep_only_tags = [ - dict(attrs={'class': ['boxocchiello2', 'titoloRub', 'titologir', 'autore-girata', 'luogo-girata', 'catenaccio', 'sezione', 'articologirata', 'bodytext', 'news-single-img', 'ls-articoloCorpo', 'ls-blog-list-1col']}), # noqa + dict(attrs={'class': ['boxocchiello2', 'titoloRub', 'titologir', 'autore-girata', 'luogo-girata', 'catenaccio', 'sezione', 'articologirata', 'bodytext', 'news-single-img', 'ls-articoloCorpo', 'ls-blog-list-1col']}), # noqa: E501 dict(name='div', attrs={'id': 'corpoarticolo'}) ] diff --git a/recipes/lalibre_be.recipe b/recipes/lalibre_be.recipe index cef8318a0e..100c32712a 100644 --- a/recipes/lalibre_be.recipe +++ b/recipes/lalibre_be.recipe @@ -32,7 +32,7 @@ class LaLibre(BasicNewsRecipe): feeds = [ - (u'L\'actu', u'http://www.lalibre.be/rss/?section=10'), + (u"L'actu", u'http://www.lalibre.be/rss/?section=10'), (u'Culture', u'http://www.lalibre.be/rss/?section=5'), (u'Economie', u'http://www.lalibre.be/rss/?section=3'), (u'Libre Entreprise', u'http://www.lalibre.be/rss/?section=904'), diff --git a/recipes/lanacion.recipe b/recipes/lanacion.recipe index 15f35c7ca2..326b9a575f 100644 --- a/recipes/lanacion.recipe +++ b/recipes/lanacion.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Lanacion(BasicNewsRecipe): title = 'La Nacion' __author__ = 'Darko Miletic' - description = "lanacion.com - Informacion actualizada las 24 horas, con noticias de Argentina y del mundo" + description = 'lanacion.com - Informacion actualizada las 24 horas, con noticias de Argentina y del mundo' publisher = 'La Nacion S.A.' category = 'news, politics, Argentina' oldest_article = 1 @@ -22,13 +22,13 @@ class Lanacion(BasicNewsRecipe): publication_type = 'newspaper' remove_empty_feeds = True masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln-home.gif' - extra_css = """ + extra_css = ''' h1{font-family: TheSans,Arial,sans-serif} body{font-family: Arial,sans-serif} img{display: block} .firma,.fecha{font-size: small} .epigrafe-columna{font-size: x-small} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -36,7 +36,7 @@ class Lanacion(BasicNewsRecipe): remove_tags = [ dict(name=['iframe', 'embed', 'object', 'meta', 'link', 'button']), dict(attrs={'id': [ - 'herramientas-sociales', 'comunidad', 'ranking-nota', 'footer']}), dict(attrs={'class': ['mas-sobre-tema', 'cinturon-notas floatFix', 'cinturon-notas']}) # noqa + 'herramientas-sociales', 'comunidad', 'ranking-nota', 'footer']}), dict(attrs={'class': ['mas-sobre-tema', 'cinturon-notas floatFix', 'cinturon-notas']}) # noqa: E501 ] remove_tags_before = dict(attrs={'id': 'nota'}) diff --git a/recipes/lapoliticaonline_ar.recipe b/recipes/lapoliticaonline_ar.recipe index 7134b6dcf6..aa1e78310d 100644 --- a/recipes/lapoliticaonline_ar.recipe +++ b/recipes/lapoliticaonline_ar.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LaPoliticaOnline_AR(BasicNewsRecipe): title = 'La Politica Online' __author__ = 'Darko Miletic' - description = "Informacion actualizada las 24 horas, con noticias de Argentina y del mundo" + description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo' publisher = 'La Politica Online SA' category = 'news, politics, Argentina' oldest_article = 1 @@ -22,13 +22,13 @@ class LaPoliticaOnline_AR(BasicNewsRecipe): publication_type = 'newspaper' remove_empty_feeds = True masthead_url = 'http://www.lapoliticaonline.com/0/img/header/logo.gif' - extra_css = """ + extra_css = ''' .title,.vsmcontent{font-family: Georgia,"Times New Roman",Times,serif} body{font-family: Arial,Helvetica,sans-serif} .galleryfooter{font-size: small; color: gainsboro;} img{display: block} .title{font-size: x-large; font-weight: bold; line-height: 2em;} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/laprensa.recipe b/recipes/laprensa.recipe index 820edf8abe..bf04fa9dfa 100644 --- a/recipes/laprensa.recipe +++ b/recipes/laprensa.recipe @@ -35,33 +35,33 @@ class LaPrensa(BasicNewsRecipe): filter_regexps = [r'.*archive.aspx.*'] remove_tags = [ - dict(name='td', attrs={'class': ["link-registro", "link-buscador"]}), + dict(name='td', attrs={'class': ['link-registro', 'link-buscador']}), dict(name='td', attrs={ - 'id': ["TDTabItem1", "TDTabItem2", "TDTabItem3", "TDTabItem4"]}), - dict(name='table', attrs={'class': ["marco-botonera"]}), - dict(name='tr', attrs={'class': ["messages", "IUTabItemSelected"]}), - dict(name='input', attrs={'id': "txt_allfields"}), + 'id': ['TDTabItem1', 'TDTabItem2', 'TDTabItem3', 'TDTabItem4']}), + dict(name='table', attrs={'class': ['marco-botonera']}), + dict(name='tr', attrs={'class': ['messages', 'IUTabItemSelected']}), + dict(name='input', attrs={'id': 'txt_allfields'}), dict(name='div', attrs={ - 'id': ["TabItem1", "TabItem2", "TabItem3", "TabItem4", "RCPanel"]}), - dict(name='span', attrs={'id': ["GWCNavigatorControl", "_ctl15"]}), - dict(name='span', attrs={'class': ["ranking-titulo", "IUTab"]}), - dict(name='a', attrs={'class': ["link-registro", ]}), - dict(name='img', src="/versions/1/imgs/icono-comentario.gif"), - dict(name='img', src="/versions/1/imgs/logo.gif"), - dict(name='img', src="/versions/1/imgs/boton-ingresar-roll.gif"), - dict(name='img', src="/versions/1/imgs/icono-recomendar.gif"), + 'id': ['TabItem1', 'TabItem2', 'TabItem3', 'TabItem4', 'RCPanel']}), + dict(name='span', attrs={'id': ['GWCNavigatorControl', '_ctl15']}), + dict(name='span', attrs={'class': ['ranking-titulo', 'IUTab']}), + dict(name='a', attrs={'class': ['link-registro', ]}), + dict(name='img', src='/versions/1/imgs/icono-comentario.gif'), + dict(name='img', src='/versions/1/imgs/logo.gif'), + dict(name='img', src='/versions/1/imgs/boton-ingresar-roll.gif'), + dict(name='img', src='/versions/1/imgs/icono-recomendar.gif'), dict(name='button'), - dict(name='img', src="/versions/1/imgs/boton-votar-roll.gif"), - dict(name='img', src="/versions/1/imgs/boton-ingresar.gif"), - dict(name='img', src="/versions/1/imgs/icono-imprimir.gif"), - dict(name='img', src="/versions/1/imgs/icono-ampliar-letra.gif"), - dict(name='img', src="/versions/1/imgs/icono-reducir-letra.gif"), - dict(name='img', src="/versions/1/imgs/pix-trans.gif"), - dict(name='img', src="/versions/1/imgs/icono-buscador.gif"), - dict(name='img', src="/versions/1/imgs/separador-linea-azul.gif"), - dict(name='img', src=" /versions/1/imgs/separador-linea.gif"), - dict(name='a', text="Powered by Civinext Groupware - V. 2.0.3567.23706"), - dict(name='img', height="0") + dict(name='img', src='/versions/1/imgs/boton-votar-roll.gif'), + dict(name='img', src='/versions/1/imgs/boton-ingresar.gif'), + dict(name='img', src='/versions/1/imgs/icono-imprimir.gif'), + dict(name='img', src='/versions/1/imgs/icono-ampliar-letra.gif'), + dict(name='img', src='/versions/1/imgs/icono-reducir-letra.gif'), + dict(name='img', src='/versions/1/imgs/pix-trans.gif'), + dict(name='img', src='/versions/1/imgs/icono-buscador.gif'), + dict(name='img', src='/versions/1/imgs/separador-linea-azul.gif'), + dict(name='img', src=' /versions/1/imgs/separador-linea.gif'), + dict(name='a', text='Powered by Civinext Groupware - V. 2.0.3567.23706'), + dict(name='img', height='0') ] extra_css = ''' @@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe): soup.head.insert(0, mtag) for item in soup.findAll(style=True): del item['style'] - for item in soup.findAll(align="center"): + for item in soup.findAll(align='center'): del item['align'] - for item in soup.findAll(bgcolor="ffffff"): + for item in soup.findAll(bgcolor='ffffff'): del item['bgcolor'] return soup diff --git a/recipes/latimes.recipe b/recipes/latimes.recipe index dc30505a29..b3e2362071 100644 --- a/recipes/latimes.recipe +++ b/recipes/latimes.recipe @@ -46,7 +46,7 @@ def what_section(url): class LATimes(BasicNewsRecipe): title = 'Los Angeles Times' __author__ = 'Jose Ortiz' - description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California' # noqa + description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California' # noqa: E501 category = 'news, politics, USA, Los Angeles, world' oldest_article = 1 max_articles_per_feed = 200 diff --git a/recipes/le_canard_enchaine.recipe b/recipes/le_canard_enchaine.recipe index 2da45db09f..ff8779ce54 100644 --- a/recipes/le_canard_enchaine.recipe +++ b/recipes/le_canard_enchaine.recipe @@ -48,7 +48,7 @@ class LeCanardEnchaine(BasicNewsRecipe): ''' def get_cover_url(self): - """Récupère dynamiquement l'URL de la dernière une""" + '''Récupère dynamiquement l'URL de la dernière une''' br = self.get_browser() try: soup = self.index_to_soup(br.open('https://boutique.lecanardenchaine.fr/acheter-au-numero/').read()) @@ -61,7 +61,7 @@ class LeCanardEnchaine(BasicNewsRecipe): elif img and img.get('src'): return 'https://boutique.lecanardenchaine.fr' + img['src'] - self.log.info('Aucune couverture trouvée, utilisation de l\'image par défaut') + self.log.info("Aucune couverture trouvée, utilisation de l'image par défaut") return 'https://image.ausha.co/2x1H3rkhwjmSwAa8KzIFfcN0G9GxfJWY83UafXn8_400x400.jpeg' except Exception: self.log.exception('Erreur lors de la récupération de la couverture') @@ -90,7 +90,7 @@ class LeCanardEnchaine(BasicNewsRecipe): feeds = [] for section_title, section_url in self.SECTIONS.items(): - print(f"Exploration de la rubrique : {section_title}") + print(f'Exploration de la rubrique : {section_title}') articles = [] try: url = 'https://www.lecanardenchaine.fr' + section_url @@ -119,10 +119,10 @@ class LeCanardEnchaine(BasicNewsRecipe): if unique_articles: feeds.append((section_title, unique_articles)) - print(f" {len(unique_articles)} articles trouvés") + print(f' {len(unique_articles)} articles trouvés') except Exception as e: - print(f"Erreur sur {section_title}: {str(e)}") + print(f'Erreur sur {section_title}: {e}') return feeds diff --git a/recipes/le_gorafi.recipe b/recipes/le_gorafi.recipe index ee3adcecce..494fba7c0c 100644 --- a/recipes/le_gorafi.recipe +++ b/recipes/le_gorafi.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LeGorafi(BasicNewsRecipe): title = u'Le GORAFI.fr' __author__ = 'Malah, LAntoine' - description = u'Depuis 1826, toute l\'information de sources contradictoires' + description = u"Depuis 1826, toute l'information de sources contradictoires" oldest_article = 7 language = 'fr' max_articles_per_feed = 100 @@ -54,7 +54,7 @@ class LeGorafi(BasicNewsRecipe): soup = self.index_to_soup(article.url) img = soup.select_one('#mvp-post-feat-img img') return img['data-lazy-src'] - print("No cover found") + print('No cover found') return None def parse_feeds(self): diff --git a/recipes/le_monde.recipe b/recipes/le_monde.recipe index 42f4e8774e..ab4e9852c4 100644 --- a/recipes/le_monde.recipe +++ b/recipes/le_monde.recipe @@ -27,7 +27,6 @@ class LeMonde(BasicNewsRecipe): oldest_article = 1 no_stylesheets = True - remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} reverse_article_order = True remove_empty_feeds = True @@ -178,10 +177,10 @@ class LeMonde(BasicNewsRecipe): # soup = self.index_to_soup('https://www.lemonde.fr/') # a = soup.find('a', {'id': 'jelec_link', 'style': True}) # if a and a['style']: - # url = a['style'].split('/') - # if len(url) > 5 and url[3].isdigit(): - # overwrite guess if actual cover ID was found - # cover_id = url[3] + # url = a['style'].split('/') + # if len(url) > 5 and url[3].isdigit(): + # overwrite guess if actual cover ID was found + # cover_id = url[3] return 'https://www.lemonde.fr/thumbnail/journal/' + cover_id + '/1000/1490' def get_article_url(self, article): diff --git a/recipes/le_monde_diplomatique_fr.recipe b/recipes/le_monde_diplomatique_fr.recipe index 556af30b61..a07e9f78bf 100644 --- a/recipes/le_monde_diplomatique_fr.recipe +++ b/recipes/le_monde_diplomatique_fr.recipe @@ -23,7 +23,7 @@ def absurl(url): class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): title = u'Le Monde diplomatique.fr' __author__ = 'Gaëtan Lehmann' - description = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …" # noqa + description = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …" # noqa: E501 oldest_article = 30 max_articles_per_feed = 100 auto_cleanup = True @@ -104,7 +104,7 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): 'url': absurl(feed_link['href']), 'description': description }) - return [("La valise diplomatique", articles)] + return [('La valise diplomatique', articles)] def parse_index_cartes(self): articles = [] @@ -125,7 +125,7 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): 'url': absurl(feed_link['href']), 'description': author }) - return [("Cartes", articles)] + return [('Cartes', articles)] def parse_feeds(self): feeds = BasicNewsRecipe.parse_feeds(self) diff --git a/recipes/le_monde_sub_paper.recipe b/recipes/le_monde_sub_paper.recipe index 0b6a7de449..698f36e33f 100644 --- a/recipes/le_monde_sub_paper.recipe +++ b/recipes/le_monde_sub_paper.recipe @@ -51,7 +51,7 @@ class LeMondeAbonne(BasicNewsRecipe): zipurl_format = 'http://medias.lemonde.fr/abonnes/editionelectronique/%Y%m%d/html/%y%m%d.zip' coverurl_format = '/img/%y%m%d01.jpg' masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png' - path_format = "%y%m%d" + path_format = '%y%m%d' keep_only_tags = [ dict(name=['h1']), @@ -66,7 +66,7 @@ class LeMondeAbonne(BasicNewsRecipe): dict(name='div', attrs={'class': 'po-copy'}) ] - article_id_pattern = re.compile("[0-9]+\\.html") + article_id_pattern = re.compile(r'[0-9]+\.html') article_url_format = 'http://www.lemonde.fr/journalelectronique/donnees/protege/%Y%m%d/html/' def get_browser(self): @@ -92,7 +92,7 @@ class LeMondeAbonne(BasicNewsRecipe): for i in range(7): self.ltime = time.gmtime(second) - self.timefmt = time.strftime(" %A %d %B %Y", + self.timefmt = time.strftime(' %A %d %B %Y', self.ltime).decode(preferred_encoding) url = time.strftime(self.zipurl_format, self.ltime) try: @@ -113,7 +113,7 @@ class LeMondeAbonne(BasicNewsRecipe): zfile.close() path = os.path.join( - self.output_dir, time.strftime(self.path_format, self.ltime), "data" + self.output_dir, time.strftime(self.path_format, self.ltime), 'data' ) self.articles_path = path @@ -121,7 +121,7 @@ class LeMondeAbonne(BasicNewsRecipe): files = os.listdir(path) nb_index_files = len([ - name for name in files if re.match("frame_gauche_[0-9]+.html", name) + name for name in files if re.match(r'frame_gauche_[0-9]+.html', name) ]) flux = [] @@ -129,39 +129,39 @@ class LeMondeAbonne(BasicNewsRecipe): article_url = time.strftime(self.article_url_format, self.ltime) for i in range(nb_index_files): - filename = os.path.join(path, "selection_%d.html" % (i + 1)) + filename = os.path.join(path, 'selection_%d.html' % (i + 1)) with open(filename, 'rb') as tmp: soup = self.index_to_soup(tmp.read()) title = soup.find('span').contents[0] - if title == "Une": - title = "À la une" - if title == "Evenement": + if title == 'Une': + title = 'À la une' + if title == 'Evenement': title = "L'événement" - if title == "Planete": - title = "Planète" - if title == "Economie - Entreprises": - title = "Économie" + if title == 'Planete': + title = 'Planète' + if title == 'Economie - Entreprises': + title = 'Économie' if title == "L'Oeil du Monde": title = "L'œil du Monde" - if title == "Enquete": - title = "Enquête" - if title == "Editorial - Analyses": - title = "Horizons" - if title == "Le Monde Economie": - title = "Économie" - if title == "Lettre et chronique": - title = "Idées" - if title == "Le Monde Géo et politique": - title = "Géopolitique" - if title == "Météo - Jeux - Ecrans": - title = "Économie & Entreprise" + if title == 'Enquete': + title = 'Enquête' + if title == 'Editorial - Analyses': + title = 'Horizons' + if title == 'Le Monde Economie': + title = 'Économie' + if title == 'Lettre et chronique': + title = 'Idées' + if title == 'Le Monde Géo et politique': + title = 'Géopolitique' + if title == 'Météo - Jeux - Ecrans': + title = 'Économie & Entreprise' tmp.close() - filename = os.path.join(path, "frame_gauche_%d.html" % (i + 1)) + filename = os.path.join(path, 'frame_gauche_%d.html' % (i + 1)) with open(filename, 'rb') as tmp: soup = self.index_to_soup(tmp.read()) articles = [] - for link in soup.findAll("a"): + for link in soup.findAll('a'): article_file = link['href'] article_id = self.article_id_pattern.search(article_file).group() article = { diff --git a/recipes/le_peuple_breton.recipe b/recipes/le_peuple_breton.recipe index cad05cc0c5..ce6e126c26 100644 --- a/recipes/le_peuple_breton.recipe +++ b/recipes/le_peuple_breton.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LePeupleBreton(BasicNewsRecipe): title = 'Le Peuple Breton' __author__ = 'Lionel Plais' - description = u'Aujourd\'hui, être libre c\'est être informé' + description = u"Aujourd'hui, être libre c'est être informé" oldest_article = 90 language = 'fr' cover_img_url = 'http://lepeuplebreton.bzh/wp-content/uploads/2017/11/le-peuple-breton-logo.jpg' diff --git a/recipes/leggo_it.recipe b/recipes/leggo_it.recipe index 2ad79b1db8..3a5393c192 100644 --- a/recipes/leggo_it.recipe +++ b/recipes/leggo_it.recipe @@ -16,7 +16,7 @@ class LeggoIT(BasicNewsRecipe): __author__ = 'Gabriele Marini' description = 'Italian Free daily newspaper' -# cover_url = 'http://www.leggo.it/img/logo-leggo2.gif' + # cover_url = 'http://www.leggo.it/img/logo-leggo2.gif' title = u'Leggo.it' publisher = 'Ced Caltagirone Editore S.p.A.' category = 'News, politics, culture, economy, general interest' @@ -53,8 +53,8 @@ class LeggoIT(BasicNewsRecipe): cover = None st = time.localtime() year = str(st.tm_year) - month = "%.2d" % st.tm_mon - day = "%.2d" % st.tm_mday + month = '%.2d' % st.tm_mon + day = '%.2d' % st.tm_mday cover = 'http://www.leggo.it/' + year + month + day + '/jpeg/LEGGO_ROMA_1.jpg' br = BasicNewsRecipe.get_browser(self) try: @@ -65,6 +65,6 @@ class LeggoIT(BasicNewsRecipe): try: br.open(cover) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = 'http://www.leggo.it/img/logo-leggo2.gif' return cover diff --git a/recipes/lemonde_dip.recipe b/recipes/lemonde_dip.recipe index 8bc0eb4d81..bc2f6614d4 100644 --- a/recipes/lemonde_dip.recipe +++ b/recipes/lemonde_dip.recipe @@ -15,7 +15,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LeMondeDiplomatiqueEn(BasicNewsRecipe): title = 'Le Monde diplomatique - English edition' __author__ = 'Darko Miletic' - description = "Le Monde diplomatique is the place you go when you want to know what's really happening. This is a major international paper that is truly independent, that sees the world in fresh ways, that focuses on places no other publications reach. We offer a clear, considered view of the conflicting interests and complexities of a modern global world. LMD in English is a concise version of the Paris-based parent edition, publishing all the major stories each month, expertly translated, and with some London-based commissions too. We offer a taster of LMD quality on our website where a selection of articles are available each month." # noqa + description = "Le Monde diplomatique is the place you go when you want to know what's really happening. This is a major international paper that is truly independent, that sees the world in fresh ways, that focuses on places no other publications reach. We offer a clear, considered view of the conflicting interests and complexities of a modern global world. LMD in English is a concise version of the Paris-based parent edition, publishing all the major stories each month, expertly translated, and with some London-based commissions too. We offer a taster of LMD quality on our website where a selection of articles are available each month." # noqa: E501 publisher = 'Le Monde diplomatique' category = 'news, politics, world' no_stylesheets = True @@ -30,13 +30,13 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe): INDEX = PREFIX + strftime('%Y/%m/') use_embedded_content = False language = 'en' - extra_css = """ + extra_css = ''' body{font-family: "Luxi sans","Lucida sans","Lucida Grande",Lucida,"Lucida Sans Unicode",sans-serif} .surtitre{font-size: 1.2em; font-variant: small-caps; margin-bottom: 0.5em} .chapo{font-size: 1.2em; font-weight: bold; margin: 1em 0 0.5em} .texte{font-family: Georgia,"Times New Roman",serif} h1{color: #990000} .notes{border-top: 1px solid #CCCCCC; font-size: 0.9em; line-height: 1.4em} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/lepoint.recipe b/recipes/lepoint.recipe index 47a59d7994..db8bfe6165 100644 --- a/recipes/lepoint.recipe +++ b/recipes/lepoint.recipe @@ -71,6 +71,6 @@ class lepoint(BasicNewsRecipe): try: br.open(masthead) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') masthead = None return masthead diff --git a/recipes/lex_fridman_podcast.recipe b/recipes/lex_fridman_podcast.recipe index 8dfb25a5c2..fee8031d49 100644 --- a/recipes/lex_fridman_podcast.recipe +++ b/recipes/lex_fridman_podcast.recipe @@ -10,7 +10,7 @@ class lexfridman(BasicNewsRecipe): 'philosophy and the nature of intelligence, consciousness, love, and power. Lex is an AI ' 'researcher at MIT and beyond. Download monthly.' ) - oldest_article = 30 # days + oldest_article = 30 # days __author__ = 'unkn0wn' language = 'en' encoding = 'utf-8' diff --git a/recipes/lexpress.recipe b/recipes/lexpress.recipe index 773c22818f..a8ba0a30b0 100644 --- a/recipes/lexpress.recipe +++ b/recipes/lexpress.recipe @@ -15,7 +15,7 @@ def classes(classes): class lepoint(BasicNewsRecipe): - title = 'L\'express' + title = "L'express" __author__ = 'calibre' description = 'Actualités' publisher = 'LExpress.fr' @@ -73,6 +73,6 @@ class lepoint(BasicNewsRecipe): try: br.open(masthead) except: - self.log("\nCover unavailable") + self.log('\nCover unavailable') masthead = None return masthead diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index a81bd5a93a..9015eb0866 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -21,12 +21,14 @@ def resize(x): if '_750' in k: return v + m_fr = { 1: 'Janvier', 2: 'Février', 3: 'Mars', 4: 'Avril', 5: 'Mai', 6: 'Juin', 7: 'Juillet', 8: 'Août', 9: 'Septembre', 10: 'Octobre', 11: 'Novembre', 12: 'Décembre' } + def json_to_html(raw): data = json.loads(raw) @@ -78,9 +80,9 @@ class Liberation(BasicNewsRecipe): title = 'Libération' __author__ = 'unkn0wn' description = ( - 'Libération est un quotidien d\'information libre, vigilant et engagé. L\'objectif de Libération est de ' + "Libération est un quotidien d'information libre, vigilant et engagé. L'objectif de Libération est de " 'fournir une information complète et vérifiée, dans tous les domaines. Sans préjugés, ni complaisance, ' - 'ses enquêtes reportages et analyses s\'emploient à comprendre et à décrire l\'actualité et à révéler ' + "ses enquêtes reportages et analyses s'emploient à comprendre et à décrire l'actualité et à révéler " 'les mutations des sociétés et des cultures.' ) language = 'fr' @@ -136,9 +138,9 @@ class Liberation(BasicNewsRecipe): b64 = base64.b64decode(self.key) query = { 'website': 'liberation', - 'website_url': '{}'.format(slug), + 'website_url': str(slug), 'published': 'true', - '_sourceInclude': '_id,content_restrictions.content_code,credits,promo_items.basic.caption,promo_items.basic.credits,promo_items.basic.url,promo_items.basic.height,promo_items.basic.width,promo_items.basic.resized_image_urls,promo_items.basic.last_updated_date,promo_items.lead_art.caption,promo_items.lead_art.credits,promo_items.lead_art.url,promo_items.lead_art.height,promo_items.lead_art.width,promo_items.lead_art.resized_image_urls,promo_items.lead_art.last_updated_date,source.additional_properties.legacy_url,content_elements,source.source_id,taxonomy.primary_section.additional_properties.original._admin.alias_ids,taxonomy.primary_section.additional_properties.original.navigation.nav_title,taxonomy.primary_section._id,taxonomy.primary_section.name,taxonomy.primary_section.path,taxonomy.tags,label,subheadlines.basic,headlines.basic,source.additional_properties.legacy_url,source.source_type,first_publish_date,display_date,canonical_url' # noqa + '_sourceInclude': '_id,content_restrictions.content_code,credits,promo_items.basic.caption,promo_items.basic.credits,promo_items.basic.url,promo_items.basic.height,promo_items.basic.width,promo_items.basic.resized_image_urls,promo_items.basic.last_updated_date,promo_items.lead_art.caption,promo_items.lead_art.credits,promo_items.lead_art.url,promo_items.lead_art.height,promo_items.lead_art.width,promo_items.lead_art.resized_image_urls,promo_items.lead_art.last_updated_date,source.additional_properties.legacy_url,content_elements,source.source_id,taxonomy.primary_section.additional_properties.original._admin.alias_ids,taxonomy.primary_section.additional_properties.original.navigation.nav_title,taxonomy.primary_section._id,taxonomy.primary_section.name,taxonomy.primary_section.path,taxonomy.tags,label,subheadlines.basic,headlines.basic,source.additional_properties.legacy_url,source.source_type,first_publish_date,display_date,canonical_url' # noqa: E501 } headers = { 'cache-control': 'public, max-age=5', @@ -148,7 +150,7 @@ class Liberation(BasicNewsRecipe): } api = 'https://arc.api.liberation.fr/content/v4/?' + urlencode(query, safe='()!', quote_via=quote) rq = Request( - url= api, + url=api, headers=headers ) raw = br.open(rq).read() diff --git a/recipes/libertad_digital.recipe b/recipes/libertad_digital.recipe index 55a90adba3..834166e00c 100644 --- a/recipes/libertad_digital.recipe +++ b/recipes/libertad_digital.recipe @@ -22,10 +22,10 @@ class LibertadDigital(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'website' masthead_url = 'http://s.libertaddigital.com/images/logo.gif' - extra_css = """ + extra_css = ''' body{font-family: Verdana,sans-serif } img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -50,7 +50,7 @@ class LibertadDigital(BasicNewsRecipe): ] def get_article_url(self, article): - return article.get('guid', None) + return article.get('guid', None) def print_version(self, url): art, sep, rest = url.rpartition('/') diff --git a/recipes/liberty_times.recipe b/recipes/liberty_times.recipe index e079ea0468..d83ab440d2 100644 --- a/recipes/liberty_times.recipe +++ b/recipes/liberty_times.recipe @@ -36,6 +36,6 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe): no_stylesheets = True encoding = 'utf-8' conversion_options = {'linearize_tables': True} - masthead_url = 'https://cache.ltn.com.tw/images/rwd_ltnlogo.png', + masthead_url = 'https://cache.ltn.com.tw/images/rwd_ltnlogo.png' cover_url = 'https://cache.ltn.com.tw/images/rwd_ltnlogo.png' auto_cleanup = True diff --git a/recipes/list_apart.recipe b/recipes/list_apart.recipe index 52011b68cc..466182d63f 100644 --- a/recipes/list_apart.recipe +++ b/recipes/list_apart.recipe @@ -14,7 +14,7 @@ class AListApart (BasicNewsRecipe): __license__ = 'GPL v3' __copyright__ = '2012, Marc Busqué ' title = u'A List Apart' - description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.' # noqa + description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.' # noqa: E501 language = 'en' tags = 'web development, software' oldest_article = 120 diff --git a/recipes/live_law.recipe b/recipes/live_law.recipe index 4d6a1e3c5a..c05ebaebec 100644 --- a/recipes/live_law.recipe +++ b/recipes/live_law.recipe @@ -6,6 +6,7 @@ def absurl(url): if url.startswith('/'): return 'https://www.livelaw.in' + url + class livelaw(BasicNewsRecipe): title = 'Live Law' __author__ = 'unkn0wn' diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 8cdbbf8bec..90c8e5a05b 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -42,7 +42,7 @@ class LiveMint(BasicNewsRecipe): if 'MINT_FRONT_1' in x['src']: return 'https://epaper.livemint.com' + x['src'].replace('-S', '') - extra_css = """ + extra_css = ''' img {margin:0 auto;} .psTopLogoItem img, .ecologoStory { width:100; } #img-cap {font-size:small; text-align:center;} @@ -51,7 +51,7 @@ class LiveMint(BasicNewsRecipe): } em, blockquote {color:#202020;} .moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag, .auth {font-size:small;} - """ + ''' keep_only_tags = [ dict( diff --git a/recipes/livescience.recipe b/recipes/livescience.recipe index 2cc26e321e..5506d15fc7 100644 --- a/recipes/livescience.recipe +++ b/recipes/livescience.recipe @@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class LiveScience(BasicNewsRecipe): - title = "Live Science" - description = "For the science geek in everyone! Stories on the latest findings from science journals and institutions. Sourced from livescience.com" + title = 'Live Science' + description = 'For the science geek in everyone! Stories on the latest findings from science journals and institutions. Sourced from livescience.com' __author__ = 'yodha8' language = 'en' oldest_article = 7 diff --git a/recipes/lrb.recipe b/recipes/lrb.recipe index 93c0d927a2..37b1150608 100644 --- a/recipes/lrb.recipe +++ b/recipes/lrb.recipe @@ -12,14 +12,14 @@ def classes(classes): def absolutize(href): if href.startswith('/'): - href = 'https://www.lrb.co.uk' + href + href = 'https://www.lrb.co.uk' + href return href class LondonReviewOfBooksPayed(BasicNewsRecipe): title = 'London Review of Books' __author__ = 'Kovid Goyal' - description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' # noqa + description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' # noqa: E501 category = 'news, literature, UK' publisher = 'LRB Ltd.' language = 'en_GB' diff --git a/recipes/lupa.recipe b/recipes/lupa.recipe index 02097782c5..a28be3b490 100644 --- a/recipes/lupa.recipe +++ b/recipes/lupa.recipe @@ -19,9 +19,9 @@ class LupaCZ(BasicNewsRecipe): publication_type = 'newsportal' no_stylesheets = True remove_javascript = True - extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \ - p.perex img {display:none;} \ - .urs p {margin: 0 0 0.8em 0;}' + extra_css = '''p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} + p.perex img {display:none;} + .urs p {margin: 0 0 0.8em 0;}''' feeds = [ (u'Zpr\xe1vi\u010dky', u'http://rss.lupa.cz/zpravicky'), diff --git a/recipes/lwn.recipe b/recipes/lwn.recipe index cee5c92b3f..e21efdfb33 100644 --- a/recipes/lwn.recipe +++ b/recipes/lwn.recipe @@ -14,7 +14,6 @@ class LWN(BasicNewsRecipe): __author__ = 'Oliver Niesner' description = 'Linux Weekly News' oldest_article = 54 - language = _('English') max_articles_per_feed = 100 needs_subscription = True language = 'en' diff --git a/recipes/lwn_free.recipe b/recipes/lwn_free.recipe index 0a9c1d9993..7cc7c473a4 100644 --- a/recipes/lwn_free.recipe +++ b/recipes/lwn_free.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LWNFree(BasicNewsRecipe): - title = "LWN Linux Weekly News (Free)" + title = 'LWN Linux Weekly News (Free)' language = 'en' __author__ = 'yodha8' description = "LWN is published every Thursday. This recipe skips current week's articles (subscriber-only) and pulls free articles from previous week." @@ -18,7 +18,7 @@ class LWNFree(BasicNewsRecipe): ] def parse_feeds(self): - """Remove paid articles and articles older than a week.""" + '''Remove paid articles and articles older than a week.''' prev_feeds = super().parse_feeds() @@ -28,12 +28,12 @@ class LWNFree(BasicNewsRecipe): for article in prev_feeds[0]: # Paid article - if "[$]" in article.title: + if '[$]' in article.title: remove_articles.append(article) continue # Count how many free weekly edition we passed - if "Weekly Edition" in article.title: + if 'Weekly Edition' in article.title: weekly_count += 1 # Remove all articles starting from 2nd free weekly edition diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 8917cbc423..162078db8b 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -137,14 +137,14 @@ class WeeklyLWN(BasicNewsRecipe): article_title = _('Undefined article title') if subsection: - section_title = "%s: %s" % (section, subsection) + section_title = '%s: %s' % (section, subsection) else: section_title = section # Most articles have anchors in their titles, *except* the # security vulnerabilities article_anchor = curr.find( - name='a', attrs={'href': re.compile('^/Articles/')}) + name='a', attrs={'href': re.compile(r'^/Articles/')}) if article_anchor: article_url = article_anchor.get('href') @@ -170,7 +170,7 @@ class WeeklyLWN(BasicNewsRecipe): }) else: - self.log.error("lwn_weekly.recipe: something bad happened; should not be able to reach this") + self.log.error('lwn_weekly.recipe: something bad happened; should not be able to reach this') ans = [(section2, articles[section2]) for section2 in ans if section2 in articles] diff --git a/recipes/mainichi.recipe b/recipes/mainichi.recipe index 69c0159996..4eaa4f02e5 100644 --- a/recipes/mainichi.recipe +++ b/recipes/mainichi.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python -""" +''' www.mainichi.jp -""" +''' from calibre.web.feeds.news import BasicNewsRecipe @@ -9,11 +9,11 @@ from calibre.web.feeds.news import BasicNewsRecipe class MainichiDailyNews(BasicNewsRecipe): title = u'\u6bce\u65e5\u65b0\u805e' __author__ = 'unkn0wn' - description = "Japanese traditional newspaper Mainichi Daily News" - publisher = "Mainichi News" - publication_type = "newspaper" - category = "news, japan" - language = "ja" + description = 'Japanese traditional newspaper Mainichi Daily News' + publisher = 'Mainichi News' + publication_type = 'newspaper' + category = 'news, japan' + language = 'ja' no_stylesheets = True remove_javascript = True diff --git a/recipes/mainichi_en.recipe b/recipes/mainichi_en.recipe index ec74c962df..6f083554e3 100644 --- a/recipes/mainichi_en.recipe +++ b/recipes/mainichi_en.recipe @@ -1,23 +1,23 @@ #!/usr/bin/env python -""" +''' www.mainichi.jp/english -""" +''' from calibre.web.feeds.news import BasicNewsRecipe class MainichiEnglishNews(BasicNewsRecipe): - title = u"The Mainichi" + title = u'The Mainichi' __author__ = 'unkn0wn' - description = "Japanese traditional newspaper Mainichi news in English" - publisher = "Mainichi News" - publication_type = "newspaper" - category = "news, japan" - language = "en_JP" + description = 'Japanese traditional newspaper Mainichi news in English' + publisher = 'Mainichi News' + publication_type = 'newspaper' + category = 'news, japan' + language = 'en_JP' - index = "http://mainichi.jp/english/" - masthead_url = index + "images/themainichi.png" + index = 'http://mainichi.jp/english/' + masthead_url = index + 'images/themainichi.png' no_stylesheets = True remove_javascript = True diff --git a/recipes/mainichi_science_news.recipe b/recipes/mainichi_science_news.recipe index d7ede8543b..381e145107 100644 --- a/recipes/mainichi_science_news.recipe +++ b/recipes/mainichi_science_news.recipe @@ -22,8 +22,8 @@ class MainichiDailyScienceNews(BasicNewsRecipe): remove_javascript = True masthead_title = u'MAINICHI DAILY NEWS' - remove_tags_before = {'class': "NewsTitle"} - remove_tags_after = {'class': "NewsBody clr"} + remove_tags_before = {'class': 'NewsTitle'} + remove_tags_after = {'class': 'NewsBody clr'} def parse_feeds(self): diff --git a/recipes/make_use_of.recipe b/recipes/make_use_of.recipe index 50cfae3de3..3546197709 100644 --- a/recipes/make_use_of.recipe +++ b/recipes/make_use_of.recipe @@ -20,28 +20,28 @@ class AdvancedUserRecipe1716102924(BasicNewsRecipe): auto_cleanup = True feeds = [ - #PC & Mobile + # PC & Mobile ('PC & Mobile', 'https://www.makeuseof.com/feed/category/pc-mobile/'), ('Windows', 'https://www.makeuseof.com/feed/category/windows/'), ('Mac', 'https://www.makeuseof.com/feed/category/mac/'), ('Linux', 'https://www.makeuseof.com/feed/category/linux/'), ('Android', 'https://www.makeuseof.com/feed/category/google-android/'), ('iOS', 'https://www.makeuseof.com/feed/category/ios/'), - #Internet + # Internet ('Internet', 'https://www.makeuseof.com/feed/category/web-based/'), ('Social Media', 'https://www.makeuseof.com/feed/category/social-media/'), ('Security', 'https://www.makeuseof.com/feed/category/security/'), ('Programming', 'https://www.makeuseof.com/feed/category/programming/'), - #Productivity + # Productivity ('Productivity', 'https://www.makeuseof.com/feed/category/productivity/'), ('Creative', 'https://www.makeuseof.com/feed/category/creative/'), ('DIY', 'https://www.makeuseof.com/feed/category/diy-projects/'), - #Lifestyle + # Lifestyle ('Lifestyle', 'https://www.makeuseof.com/feed/category/lifestyle/'), ('Smart Home', 'https://www.makeuseof.com/feed/category/smart-home/'), ('Gaming', 'https://www.makeuseof.com/feed/category/games/'), ('Entertainment', 'https://www.makeuseof.com/feed/category/entertainment/'), - #Technology Explained + # Technology Explained ('Technology Explained', 'https://www.makeuseof.com/feed/category/technology-explained/'), ('Artificial Intelligence', 'https://www.makeuseof.com/feed/tag/artificial-intelligence/'), ('Tech Jargon', 'https://www.makeuseof.com/feed/tag/pc-jargon-terminology/'), diff --git a/recipes/marca.recipe b/recipes/marca.recipe index fd64c937f9..48c0b436f0 100644 --- a/recipes/marca.recipe +++ b/recipes/marca.recipe @@ -23,12 +23,12 @@ class Marca(BasicNewsRecipe): language = 'es' publication_type = 'newsportal' masthead_url = 'http://estaticos.marca.com/deporte/img/v3.0/img_marca-com.png' - extra_css = """ + extra_css = ''' body{font-family: Tahoma,Geneva,sans-serif} h1,h2,h3,h4,h5,h6{font-family: 'LatoBlack',Tahoma,Geneva,sans-serif} .cab_articulo h4 {font-family: Georgia,"Times New Roman",Times,serif} .antetitulo{text-transform: uppercase} - """ + ''' feeds = [(u'Portada', u'http://estaticos.marca.com/rss/portada.xml')] @@ -46,4 +46,4 @@ class Marca(BasicNewsRecipe): return soup def get_article_url(self, article): - return article.get('guid', None) + return article.get('guid', None) diff --git a/recipes/marctv.recipe b/recipes/marctv.recipe index ca5cbe5f84..87890a95a9 100644 --- a/recipes/marctv.recipe +++ b/recipes/marctv.recipe @@ -26,7 +26,7 @@ class MarcTVde(BasicNewsRecipe): remove_tags = [] - keep_only_tags = dict(name='div', attrs={'class': ["content"]}) + keep_only_tags = dict(name='div', attrs={'class': ['content']}) feeds = [ (u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), diff --git a/recipes/mateusz_czytania.recipe b/recipes/mateusz_czytania.recipe index 9717176efb..4a40aeb1f3 100644 --- a/recipes/mateusz_czytania.recipe +++ b/recipes/mateusz_czytania.recipe @@ -23,7 +23,6 @@ class czytania_mateusz(BasicNewsRecipe): remove_javascript = True simultaneous_downloads = 2 max_articles_per_feed = 100 - auto_cleanup = True feeds = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')] diff --git a/recipes/mdj.recipe b/recipes/mdj.recipe index d454e2b5dd..ed12fef2e6 100644 --- a/recipes/mdj.recipe +++ b/recipes/mdj.recipe @@ -15,10 +15,10 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): auto_cleanup = True feeds = [ - ('Local News', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/local&l=50&s=start_time&sd=desc'), # noqa + ('Local News', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/local&l=50&s=start_time&sd=desc'), ('Sports', 'http://www.mdjonline.com/search/?f=rss&t=article&c=sports&l=50&s=start_time&sd=desc'), ('Obits', 'http://www.mdjonline.com/search/?f=rss&t=article&c=obituaries&l=50&s=start_time&sd=desc'), - ('Editorial & oped', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/mdj_editorials&l=50&s=start_time&sd=desc'), # noqa - ('Lifestyle', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/lifestyle&l=50&s=start_time&sd=desc'), # noqa - ('Blogs', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/blogs&l=50&s=start_time&sd=desc') # noqa + ('Editorial & oped', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/mdj_editorials&l=50&s=start_time&sd=desc'), + ('Lifestyle', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/lifestyle&l=50&s=start_time&sd=desc'), + ('Blogs', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/blogs&l=50&s=start_time&sd=desc') ] diff --git a/recipes/media2.recipe b/recipes/media2.recipe index b889c77552..a3a9bd555d 100644 --- a/recipes/media2.recipe +++ b/recipes/media2.recipe @@ -10,7 +10,7 @@ class media2_pl(BasicNewsRecipe): title = u'Media2' __author__ = 'teepel ' language = 'pl' - description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' # noqa + description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' # noqa: E501 masthead_url = 'http://media2.pl/res/logo/www.png' cover_url = 'http://media2.pl/res/logo/www.png' INDEX = 'http://media2.pl' diff --git a/recipes/media_zone.recipe b/recipes/media_zone.recipe index a55d65d10d..ddf315b644 100644 --- a/recipes/media_zone.recipe +++ b/recipes/media_zone.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class MediaZona(BasicNewsRecipe): title = '\u041c\u0435\u0434\u0438\u0430\u0417\u043e\u043d\u0430' __author__ = 'bugmen00t' - description = 'Общественно-политическое издание, сделавшее акцент на функционировании закона в России. По мнению авторитетных медиаэкспертов, по цитируемости и посещаемости формат «Медиазоны» оказался ведущим форматом новостного издания в России 2015 года. «Медиазона» пишет о реально происходящем в России, первой улавливая векторы развития общества.' # noqa + description = 'Общественно-политическое издание, сделавшее акцент на функционировании закона в России. По мнению авторитетных медиаэкспертов, по цитируемости и посещаемости формат «Медиазоны» оказался ведущим форматом новостного издания в России 2015 года. «Медиазона» пишет о реально происходящем в России, первой улавливая векторы развития общества.' # noqa: E501 publisher = 'zona.media' category = 'news' cover_url = u'https://zona.media/s/share/default_mz.png' diff --git a/recipes/mediaindonesia.recipe b/recipes/mediaindonesia.recipe index e282ac450d..4f5dd5b740 100644 --- a/recipes/mediaindonesia.recipe +++ b/recipes/mediaindonesia.recipe @@ -24,7 +24,7 @@ class Media(BasicNewsRecipe): no_javascript = True remove_tags = [dict(id=['atas', 'merah', 'putih']), dict(name='a')] - remove_tags_after = [dict(id="putih")] + remove_tags_after = [dict(id='putih')] extra_css = ''' .judul {font-size: x-large;} diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index 239e385c06..9aa66689c6 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -14,7 +14,7 @@ from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2021, Loïc Houpert . Adapted from: 2016, Daniel Bonnery; 2009, Mathieu Godlewski; 2010-2012, Louis Gesbert' # noqa +__copyright__ = '2021, Loïc Houpert . Adapted from: 2016, Daniel Bonnery; 2009, Mathieu Godlewski; 2010-2012, Louis Gesbert' ''' Mediapart ''' @@ -49,7 +49,7 @@ class Mediapart(BasicNewsRecipe): conversion_options = {'smarten_punctuation': True} - masthead_url = "https://raw.githubusercontent.com/lhoupert/calibre_contrib/main/mediapart_masthead.png" + masthead_url = 'https://raw.githubusercontent.com/lhoupert/calibre_contrib/main/mediapart_masthead.png' ignore_duplicate_articles = {'title'} resolve_internal_links = True @@ -65,7 +65,7 @@ class Mediapart(BasicNewsRecipe): url = e.hdrs.get('location') soup = self.index_to_soup(url) link = soup.find('a', href=True) - skip_sections =[ # add sections you want to skip + skip_sections =[ # add sections you want to skip '/video/', '/videos/', '/media/' ] if any(x in link['href'] for x in skip_sections): @@ -95,7 +95,7 @@ class Mediapart(BasicNewsRecipe): for feed in feeds: feed_name = feed.title.lower() for article in feed.articles: - if feed_name != 'autres' and feed_name not in article.url: + if feed_name != 'autres' and feed_name not in article.url: feed.articles.remove(article) if feed_name == 'autres' and any(section in article.url for section in self.sections): feed.articles.remove(article) diff --git a/recipes/medscape.recipe b/recipes/medscape.recipe index 098f7d54de..68cd93ebd9 100644 --- a/recipes/medscape.recipe +++ b/recipes/medscape.recipe @@ -56,7 +56,7 @@ class MedScrape(BasicNewsRecipe): # the original url is: http://www.medscape.com/viewarticle/728955?src=rss # the print url is: http://www.medscape.com/viewarticle/728955_print print_url = url.partition('?')[0] + '_print' - # print 'the printable version is: ',print_url + # print('the printable version is: ',print_url) return print_url def preprocess_html(self, soup): diff --git a/recipes/meduza.recipe b/recipes/meduza.recipe index d3ff8e0036..dc0e01c724 100644 --- a/recipes/meduza.recipe +++ b/recipes/meduza.recipe @@ -6,14 +6,14 @@ from calibre.web.feeds.news import BasicNewsRecipe class Meduza(BasicNewsRecipe): title = u'Meduza' - description = u'Russian- and English-language independent news website, based in Latvia. It was founded in 2014 by a group of former employees of the then independent Lenta.ru news website. Every day we bring you the most important news and feature stories from hundreds of sources in Russia and across the former Soviet Union.' # noqa + description = u'Russian- and English-language independent news website, based in Latvia. It was founded in 2014 by a group of former employees of the then independent Lenta.ru news website. Every day we bring you the most important news and feature stories from hundreds of sources in Russia and across the former Soviet Union.' # noqa: E501 __author__ = 'bugmen00t' publisher = 'Medusa Project SIA' publication_type = 'blog' oldest_article = 21 max_articles_per_feed = 100 language = 'en_RU' - cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa + cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa: E501 auto_cleanup = False no_stylesheets = False diff --git a/recipes/meduza_ru.recipe b/recipes/meduza_ru.recipe index dcecb40686..4943df13cc 100644 --- a/recipes/meduza_ru.recipe +++ b/recipes/meduza_ru.recipe @@ -6,14 +6,14 @@ from calibre.web.feeds.news import BasicNewsRecipe class Meduza(BasicNewsRecipe): title = u'Meduza' - description = u'\u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u043E\u0435 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435. \u041C\u044B \u0432\u044B\u0431\u0438\u0440\u0430\u0435\u043C \u0434\u043B\u044F \u0432\u0430\u0441 \u0441\u0430\u043C\u044B\u0435 \u0432\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u0433\u043E\u0442\u043E\u0432\u0438\u043C \u043B\u0443\u0447\u0448\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435.' # noqa + description = u'\u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u043E\u0435 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435. \u041C\u044B \u0432\u044B\u0431\u0438\u0440\u0430\u0435\u043C \u0434\u043B\u044F \u0432\u0430\u0441 \u0441\u0430\u043C\u044B\u0435 \u0432\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u0433\u043E\u0442\u043E\u0432\u0438\u043C \u043B\u0443\u0447\u0448\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435.' # noqa: E501 __author__ = 'bugmen00t' publisher = 'Medusa Project SIA' publication_type = 'blog' oldest_article = 7 max_articles_per_feed = 100 language = 'ru' - cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa + cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa: E501 auto_cleanup = False no_stylesheets = False diff --git a/recipes/mel.recipe b/recipes/mel.recipe index 44430b58d0..b76cae8a4f 100644 --- a/recipes/mel.recipe +++ b/recipes/mel.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Mel(BasicNewsRecipe): title = '\u041C\u0435\u043B' __author__ = 'bugmen00t' - description = '\u041C\u0435\u0434\u0438\u0430 \u043F\u0440\u043E \u043E\u0431\u0440\u0430\u0437\u043E\u0432\u0430\u043D\u0438\u0435 \u0438 \u0432\u043E\u0441\u043F\u0438\u0442\u0430\u043D\u0438\u0435 \u0434\u0435\u0442\u0435\u0439' # noqa + description = '\u041C\u0435\u0434\u0438\u0430 \u043F\u0440\u043E \u043E\u0431\u0440\u0430\u0437\u043E\u0432\u0430\u043D\u0438\u0435 \u0438 \u0432\u043E\u0441\u043F\u0438\u0442\u0430\u043D\u0438\u0435 \u0434\u0435\u0442\u0435\u0439' # noqa: E501 publisher = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u00AB\u041C\u0435\u043B\u00BB' category = 'blog' cover_url = u'https://static.mel.fm/images/project/site/dummyLogo.png' diff --git a/recipes/merco_press.recipe b/recipes/merco_press.recipe index 88d1eaaaa5..77433a188c 100644 --- a/recipes/merco_press.recipe +++ b/recipes/merco_press.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class MercoPress(BasicNewsRecipe): title = u'Merco Press' - description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America." + description = u'Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America.' cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif' __author__ = 'Russell Phillips' diff --git a/recipes/mesec.recipe b/recipes/mesec.recipe index db4f1e92e0..38149bf1fa 100644 --- a/recipes/mesec.recipe +++ b/recipes/mesec.recipe @@ -19,9 +19,9 @@ class MesecCZ(BasicNewsRecipe): publication_type = 'newsportal' no_stylesheets = True remove_javascript = True - extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \ - p.perex img {display:none;} \ - .urs p {margin: 0 0 0.8em 0;}' + extra_css = '''p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} + p.perex img {display:none;} + .urs p {margin: 0 0 0.8em 0;}''' feeds = [ (u'Aktuality', u'http://www.mesec.cz/rss/aktuality/'), diff --git a/recipes/miami_herald.recipe b/recipes/miami_herald.recipe index 25965b0f4e..ed220abc3a 100644 --- a/recipes/miami_herald.recipe +++ b/recipes/miami_herald.recipe @@ -17,7 +17,7 @@ def classes(classes): class TheMiamiHerald(BasicNewsRecipe): title = 'The Miami Herald' __author__ = 'Kovid Goyal' - description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." # noqa + description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." # noqa: E501 oldest_article = 1 max_articles_per_feed = 100 publisher = u'The Miami Herald' diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe index dc14af12e2..225ed5c498 100644 --- a/recipes/mit_technology_review.recipe +++ b/recipes/mit_technology_review.recipe @@ -19,7 +19,7 @@ def absurl(x): if x.startswith('//'): x = 'http:' + x elif not x.startswith('http'): - x = "http://www.technologyreview.com" + x + x = 'http://www.technologyreview.com' + x return x @@ -58,8 +58,8 @@ class MitTechnologyReview(BasicNewsRecipe): prefixed_classes('contentHeader contentArticleHeader contentBody') ] remove_tags = [ - dict(name="aside"), - dict(name="svg"), + dict(name='aside'), + dict(name='svg'), prefixed_classes( 'image__placeholder sliderAd__wrapper eyebrow__wrap-- screen-reader-text' ), @@ -83,7 +83,7 @@ class MitTechnologyReview(BasicNewsRecipe): if script := soup.find('script', id='preload'): raw = script.contents[0] m = re.search(r'\"children\":\[{\"name\":\"magazine-hero\"', raw) - spl = re.split(r"(?=\{)", raw[m.start():], 1)[1] + spl = re.split(r'(?=\{)', raw[m.start():], 1)[1] data = json.JSONDecoder().raw_decode(spl)[0] self.cover_url = data['children'][0]['config']['src'] + '?fit=572,786' self.timefmt = ' [' + data['config']['issueDate'] + ']' @@ -94,7 +94,7 @@ class MitTechnologyReview(BasicNewsRecipe): feeds = OrderedDict() classNamePrefixes = [ - "magazineHero__letter--", "teaserItem__title", "teaserItem--aside__title" + 'magazineHero__letter--', 'teaserItem__title', 'teaserItem--aside__title' ] for div in soup.findAll( attrs={ @@ -151,7 +151,7 @@ class MitTechnologyReview(BasicNewsRecipe): if section_title not in feeds: feeds[section_title] = [] feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] + ans = list(feeds.items()) return ans def preprocess_html(self, soup): diff --git a/recipes/mmc_rtv.recipe b/recipes/mmc_rtv.recipe index 67bc888d90..d39266c3e5 100644 --- a/recipes/mmc_rtv.recipe +++ b/recipes/mmc_rtv.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class MMCRTV(BasicNewsRecipe): title = u'MMC RTV Slovenija' __author__ = u'TadejS' - description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija" + description = u'Prvi interaktivni multimedijski portal, MMC RTV Slovenija' oldest_article = 3 max_articles_per_feed = 100 language = 'sl' diff --git a/recipes/modoros.recipe b/recipes/modoros.recipe index a934eb8427..c9d1269630 100644 --- a/recipes/modoros.recipe +++ b/recipes/modoros.recipe @@ -15,7 +15,7 @@ from hashlib import md5 class ModorosBlogHu(BasicNewsRecipe): __author__ = 'Zsolt Botykai' title = u'Modoros Blog' - description = u"Modoros.blog.hu" + description = u'Modoros.blog.hu' oldest_article = 10000 max_articles_per_feed = 10000 reverse_article_order = True @@ -24,7 +24,6 @@ class ModorosBlogHu(BasicNewsRecipe): remove_empty_feeds = True no_stylesheets = True feeds = [(u'Modoros Blog', u'http://modoros.blog.hu/rss')] - remove_javascript = True use_embedded_content = False preprocess_regexps = [ (re.compile(r'.*?', re.DOTALL | re.IGNORECASE), diff --git a/recipes/moneycontrol.recipe b/recipes/moneycontrol.recipe index 98d7c15350..68f422a475 100644 --- a/recipes/moneycontrol.recipe +++ b/recipes/moneycontrol.recipe @@ -16,7 +16,7 @@ class MoneyControlRecipe(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True resolve_internal_links = True - oldest_article = 1 # days + oldest_article = 1 # days extra_css = ''' img {display:block; margin:0 auto;} diff --git a/recipes/montreal_gazette.recipe b/recipes/montreal_gazette.recipe index 6701af2093..5b9bc97f92 100644 --- a/recipes/montreal_gazette.recipe +++ b/recipes/montreal_gazette.recipe @@ -60,44 +60,44 @@ class CanWestPaper(BasicNewsRecipe): ] # un-comment the following six lines for the Vancouver Province - # title = u'Vancouver Province' - # url_prefix = 'http://www.theprovince.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' - # logo_url = 'vplogo.jpg' - # fp_tag = 'CAN_TP' + # # title = u'Vancouver Province' + # # url_prefix = 'http://www.theprovince.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' + # # logo_url = 'vplogo.jpg' + # # fp_tag = 'CAN_TP' # un-comment the following six lines for the Vancouver Sun - # title = u'Vancouver Sun' - # url_prefix = 'http://www.vancouversun.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' - # logo_url = 'vslogo.jpg' - # fp_tag = 'CAN_VS' + # # title = u'Vancouver Sun' + # # url_prefix = 'http://www.vancouversun.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' + # # logo_url = 'vslogo.jpg' + # # fp_tag = 'CAN_VS' # un-comment the following six lines for the Calgary Herald - # title = u'Calgary Herald' - # url_prefix = 'http://www.calgaryherald.com' - # description = u'News from Calgary, AB' - # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' - # logo_url = 'chlogo.jpg' - # fp_tag = 'CAN_CH' + # # title = u'Calgary Herald' + # # url_prefix = 'http://www.calgaryherald.com' + # # description = u'News from Calgary, AB' + # # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' + # # logo_url = 'chlogo.jpg' + # # fp_tag = 'CAN_CH' # un-comment the following six lines for the Edmonton Journal - # title = u'Edmonton Journal' - # url_prefix = 'http://www.edmontonjournal.com' - # description = u'News from Edmonton, AB' - # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' - # logo_url = 'ejlogo.jpg' - # fp_tag = 'CAN_EJ' + # # title = u'Edmonton Journal' + # # url_prefix = 'http://www.edmontonjournal.com' + # # description = u'News from Edmonton, AB' + # # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' + # # logo_url = 'ejlogo.jpg' + # # fp_tag = 'CAN_EJ' # un-comment the following six lines for the Ottawa Citizen - # title = u'Ottawa Citizen' - # url_prefix = 'http://www.ottawacitizen.com' - # description = u'News from Ottawa, ON' - # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' - # logo_url = 'oclogo.jpg' - # fp_tag = 'CAN_OC' + # # title = u'Ottawa Citizen' + # # url_prefix = 'http://www.ottawacitizen.com' + # # description = u'News from Ottawa, ON' + # # std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' + # # logo_url = 'oclogo.jpg' + # # fp_tag = 'CAN_OC' # un-comment the following six lines for the Montreal Gazette title = u'Montreal Gazette' @@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe): .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocredit { font-size: xx-small; font-weight: normal; }''' - keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] + keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})] remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( @@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe): name='div', attrs={'class': 'copyright'}), dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'id': 'soundoff'}), - dict(name='div', attrs={'id': re.compile('flyer')}), + dict(name='div', attrs={'id': re.compile(r'flyer')}), dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] def get_cover_url(self): @@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe): except: while daysback < 7: cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \ - str((date.today() - timedelta(days=daysback)).day) + \ - '/lg/' + self.fp_tag + '.jpg' + str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) @@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe): continue break if daysback == 7: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover def fixChars(self, string): # Replace lsquo (\x91) - fixed = re.sub("\x91", "‘", string) + fixed = string.replace('\x91', '‘') # Replace rsquo (\x92) - fixed = re.sub("\x92", "’", fixed) + fixed = fixed.replace('\x92', '’') # Replace ldquo (\x93) - fixed = re.sub("\x93", "“", fixed) + fixed = fixed.replace('\x93', '“') # Replace rdquo (\x94) - fixed = re.sub("\x94", "”", fixed) + fixed = fixed.replace('\x94', '”') # Replace ndash (\x96) - fixed = re.sub("\x96", "–", fixed) + fixed = fixed.replace('\x96', '–') # Replace mdash (\x97) - fixed = re.sub("\x97", "—", fixed) - fixed = re.sub("’", "’", fixed) + fixed = fixed.replace('\x97', '—') + fixed = fixed.replace('’', '’') return fixed def massageNCXText(self, description): @@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe): divtags = soup.findAll('div', attrs={'id': ''}) if divtags: for div in divtags: - del(div['id']) + del div['id'] pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps @@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe): if url.startswith('/'): url = self.url_prefix + url if not url.startswith(self.url_prefix): - print("Rejected " + url) + print('Rejected ' + url) return if url in self.url_list: - print("Rejected dup " + url) + print('Rejected dup ' + url) return self.url_list.append(url) title = self.tag_to_string(atag, False) @@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe): return dtag = adiv.find('div', 'content') description = '' - print("URL " + url) - print("TITLE " + title) + print('URL ' + url) + print('TITLE ' + title) if dtag is not None: stag = dtag.span if stag is not None: @@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe): description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) - print("DESCRIPTION: " + description) + print('DESCRIPTION: ' + description) if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) def parse_web_index(key, keyurl): - print("Section: " + key + ': ' + self.url_prefix + keyurl) + print('Section: ' + key + ': ' + self.url_prefix + keyurl) try: soup = self.index_to_soup(self.url_prefix + keyurl) except: - print("Section: " + key + ' NOT FOUND') + print('Section: ' + key + ' NOT FOUND') return ans.append(key) mainsoup = soup.find('div', 'bodywrapper') @@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe): for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}): handle_article(wdiv, key) - for (k, url) in self.postmedia_index_pages: + for k,url in self.postmedia_index_pages: parse_web_index(k, url) ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/moscow_times.recipe b/recipes/moscow_times.recipe index 21004cbe0e..6f78caa6e3 100644 --- a/recipes/moscow_times.recipe +++ b/recipes/moscow_times.recipe @@ -10,8 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class Moscowtimes(BasicNewsRecipe): title = 'The Moscow Times (light version)' __author__ = 'Darko Miletic and Sujata Raman' - description = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).' # noqa - category = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg' # noqa + description = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).' # noqa: E501 + category = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg' # noqa: E501 publisher = 'The Moscow Times' language = 'en_RU' oldest_article = 2 diff --git a/recipes/moscowtimes_en.recipe b/recipes/moscowtimes_en.recipe index 9fca9a29c2..8ab1832838 100644 --- a/recipes/moscowtimes_en.recipe +++ b/recipes/moscowtimes_en.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class MoscowTimes(BasicNewsRecipe): title = 'The Moscow Times' __author__ = 'bugmen00t' - description = 'The Moscow Times is Russia’s leading, independent English-language media outlet. Our team of Russian and English journalists provide readers across the world with breaking news, engaging stories and balanced reporting about the largest country on Earth.' # noqa + description = 'The Moscow Times is Russia’s leading, independent English-language media outlet. Our team of Russian and English journalists provide readers across the world with breaking news, engaging stories and balanced reporting about the largest country on Earth.' # noqa: E501 publisher = 'Tiamti LLC' category = 'newspaper' cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg' @@ -22,7 +22,7 @@ class MoscowTimes(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class': 'article__tags'}) - remove_tags = [ + remove_tags = [ dict(name='aside'), dict(name='footer'), dict(name='section', attrs={'class': 'cluster'}), diff --git a/recipes/moscowtimes_ru.recipe b/recipes/moscowtimes_ru.recipe index 318b335f36..d8a16fcd52 100644 --- a/recipes/moscowtimes_ru.recipe +++ b/recipes/moscowtimes_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class MoscowTimes(BasicNewsRecipe): title = 'The Moscow Times' __author__ = 'bugmen00t' - description = '\u0410\u043D\u0433\u043B\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0420\u043E\u0441\u0441\u0438\u0438. \u041D\u0430\u0448\u0430 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F \u0434\u0435\u043B\u0438\u0442\u0441\u044F \u0441 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u043C\u0438 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u043C\u0438 \u0438 \u0441\u0430\u043C\u044B\u043C\u0438 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u043C\u0438 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0441\u0430\u043C\u043E\u0439 \u0431\u043E\u043B\u044C\u0448\u043E\u0439 \u0441\u0442\u0440\u0430\u043D\u044B \u043D\u0430 \u0417\u0435\u043C\u043B\u0435.' # noqa + description = '\u0410\u043D\u0433\u043B\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0420\u043E\u0441\u0441\u0438\u0438. \u041D\u0430\u0448\u0430 \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F \u0434\u0435\u043B\u0438\u0442\u0441\u044F \u0441 \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u043C\u0438\u0440\u0443 \u043F\u043E\u0441\u043B\u0435\u0434\u043D\u0438\u043C\u0438 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u043C\u0438 \u0438 \u0441\u0430\u043C\u044B\u043C\u0438 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u043C\u0438 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0441\u0430\u043C\u043E\u0439 \u0431\u043E\u043B\u044C\u0448\u043E\u0439 \u0441\u0442\u0440\u0430\u043D\u044B \u043D\u0430 \u0417\u0435\u043C\u043B\u0435.' # noqa: E501 publisher = '\u041E\u041E\u041E \u0422\u0438\u044D\u043C\u0442\u0438' category = 'newspaper' cover_url = u'https://static.themoscowtimes.com/img/share_default.jpg' diff --git a/recipes/movie_web.recipe b/recipes/movie_web.recipe index bf6231e896..6030346cdf 100644 --- a/recipes/movie_web.recipe +++ b/recipes/movie_web.recipe @@ -23,20 +23,20 @@ class AdvancedUserRecipe1716109041(BasicNewsRecipe): auto_cleanup = True feeds = [ - #News feeds + # News feeds ('Trailers', 'https://movieweb.com/feed/trailers/'), ('Movie News', 'https://movieweb.com/feed/movie-news/'), ('TV News', 'https://movieweb.com/feed/tv-news/'), ('Marvel Cinematic Universe News', 'https://movieweb.com/feed/mcu-news/'), ('DC Universe News', 'https://movieweb.com/feed/tag/dcu-news/'), - #Genre feeds + # Genre feeds ('Action', 'https://movieweb.com/feed/tag/action/'), ('Comedy', 'https://movieweb.com/feed/tag/comedy/'), ('Fantasy', 'https://movieweb.com/feed/tag/fantasy/'), ('Horror', 'https://movieweb.com/feed/tag/horror/'), ('Sci-Fi', 'https://movieweb.com/feed/tag/sci-fi/'), ('Western', 'https://movieweb.com/feed/tag/western/'), - #What to Watch feeds + # What to Watch feeds ('Apple TV+', 'https://movieweb.com/feed/tag/apple-tv-plus/'), ('Criterion Collection', 'https://movieweb.com/feed/tag/criterion-collection/'), ('Disney+', 'https://movieweb.com/feed/tag/disney-plus/'), diff --git a/recipes/mwjournal.recipe b/recipes/mwjournal.recipe index d9ba4161b1..c1ab7555bc 100644 --- a/recipes/mwjournal.recipe +++ b/recipes/mwjournal.recipe @@ -49,24 +49,24 @@ class MWJournal(BasicNewsRecipe): 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] # Specify extra CSS - overrides ALL other CSS (IE. Added last). - extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ - .introduction, .first { font-weight: bold; } \ - .cross-head { font-weight: bold; font-size: 125%; } \ - .cap, .caption { display: block; font-size: 80%; font-style: italic; } \ - .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \ - .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ - .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \ - font-size: 80%; font-style: italic; margin: 1px auto; } \ - .story-date, .published { font-size: 80%; } \ - table { width: 100%; } \ - td img { display: block; margin: 5px auto; } \ - ul { padding-top: 10px; } \ - ol { padding-top: 10px; } \ - li { padding-top: 5px; padding-bottom: 5px; } \ - h1 { font-size: 175%; font-weight: bold; } \ - h2 { font-size: 150%; font-weight: bold; } \ - h3 { font-size: 125%; font-weight: bold; } \ - h4, h5, h6 { font-size: 100%; font-weight: bold; }' + extra_css = '''body { font-family: verdana, helvetica, sans-serif; } + .introduction, .first { font-weight: bold; } + .cross-head { font-weight: bold; font-size: 125%; } + .cap, .caption { display: block; font-size: 80%; font-style: italic; } + .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } + .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, + .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; + font-size: 80%; font-style: italic; margin: 1px auto; } + .story-date, .published { font-size: 80%; } + table { width: 100%; } + td img { display: block; margin: 5px auto; } + ul { padding-top: 10px; } + ol { padding-top: 10px; } + li { padding-top: 5px; padding-bottom: 5px; } + h1 { font-size: 175%; font-weight: bold; } + h2 { font-size: 150%; font-weight: bold; } + h3 { font-size: 125%; font-weight: bold; } + h4, h5, h6 { font-size: 100%; font-weight: bold; }''' # Remove the line breaks, href links and float left/right and picture # width/height. diff --git a/recipes/myapple_pl.recipe b/recipes/myapple_pl.recipe index 8e28e8ff89..46bc27eab0 100644 --- a/recipes/myapple_pl.recipe +++ b/recipes/myapple_pl.recipe @@ -12,7 +12,6 @@ class MyAppleRecipe(BasicNewsRecipe): description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.' cover_url = '' remove_empty_feeds = True - no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100000 recursions = 0 diff --git a/recipes/n_kaliningrad.recipe b/recipes/n_kaliningrad.recipe index 2d7ea1bfdf..b985043115 100644 --- a/recipes/n_kaliningrad.recipe +++ b/recipes/n_kaliningrad.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class NewKaliningrad(BasicNewsRecipe): title = '\u041D\u043E\u0432\u044B\u0439 \u041A\u0430\u043B\u0438\u043D\u0438\u043D\u0433\u0440\u0430\u0434' __author__ = 'bugmen00t' - description = '\u0420\u0435\u0433\u0438\u043E\u043D\u0430\u043B\u044C\u043D\u044B\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u043F\u043E\u0440\u0442\u0430\u043B \u041A\u0430\u043B\u0438\u043D\u0438\u043D\u0433\u0440\u0430\u0434\u0441\u043A\u043E\u0439 \u043E\u0431\u043B\u0430\u0441\u0442\u0438 \u0438 \u0433\u043E\u0440\u043E\u0434\u0430 \u041A\u0430\u043B\u0438\u043D\u0438\u043D\u0433\u0440\u0430\u0434\u0430.' # noqa + description = '\u0420\u0435\u0433\u0438\u043E\u043D\u0430\u043B\u044C\u043D\u044B\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u043F\u043E\u0440\u0442\u0430\u043B \u041A\u0430\u043B\u0438\u043D\u0438\u043D\u0433\u0440\u0430\u0434\u0441\u043A\u043E\u0439 \u043E\u0431\u043B\u0430\u0441\u0442\u0438 \u0438 \u0433\u043E\u0440\u043E\u0434\u0430 \u041A\u0430\u043B\u0438\u043D\u0438\u043D\u0433\u0440\u0430\u0434\u0430.' # noqa: E501 publisher = '\u0417\u0410\u041E "\u0422\u0432\u0438\u043A \u041C\u0430\u0440\u043A\u0435\u0442\u0438\u043D\u0433"' category = 'news' cover_url = u'https://www.newkaliningrad.ru/images/logo.gif' diff --git a/recipes/n_plus_one.recipe b/recipes/n_plus_one.recipe index 89a5140ee6..1a4655fb66 100644 --- a/recipes/n_plus_one.recipe +++ b/recipes/n_plus_one.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1467724863(BasicNewsRecipe): title = 'N+1' __author__ = 'bugmen00t' - description = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!' # noqa + description = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!' # noqa: E501 publisher = 'N+1' category = 'news' cover_url = u'https://nplus1.ru/i/logo.png' diff --git a/recipes/nacional_cro.recipe b/recipes/nacional_cro.recipe index 96d3a1d2f6..6dd5f6d9ac 100644 --- a/recipes/nacional_cro.recipe +++ b/recipes/nacional_cro.recipe @@ -23,7 +23,7 @@ def new_tag(soup, name, attrs=()): class NacionalCro(BasicNewsRecipe): title = 'Nacional - Hr' __author__ = 'Darko Miletic' - description = "news from Croatia" + description = 'news from Croatia' publisher = 'Nacional.hr' category = 'news, politics, Croatia' oldest_article = 2 @@ -37,13 +37,13 @@ class NacionalCro(BasicNewsRecipe): lang = 'hr-HR' direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa: E501 conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 remove_tags = [dict(name=['object', 'link', 'embed'])] @@ -53,9 +53,9 @@ class NacionalCro(BasicNewsRecipe): soup.html['lang'] = self.lang soup.html['dir'] = self.direction mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) + ('http-equiv', 'Content-Language'), ('content', self.lang)]) mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) + ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) for item in soup.findAll(style=True): diff --git a/recipes/nasa.recipe b/recipes/nasa.recipe index b1b5b5e7f8..f1c6702004 100644 --- a/recipes/nasa.recipe +++ b/recipes/nasa.recipe @@ -49,11 +49,11 @@ class NASA(BasicNewsRecipe): lambda match: match.group().replace(match.group(1), 'UTF-8')), # Remove any banners/links/ads/cruft before the body of the article. - (r')|(
)|(
)|(

)|())', lambda match: '

'), # noqa + (r')|(
)|(
)|(

)|())', lambda match: '

'), # noqa: E501 # Remove any links/ads/comments/cruft from the end of the body # of the article. - (r'(()|(
)|(

©)|(

- """ + ''' def parse_index(self): br = self.get_browser() - params = "" + params = '' if _issue_url: - month = urlparse(_issue_url).path.split("/")[-1] + month = urlparse(_issue_url).path.split('/')[-1] params = f'?{urlencode({"magazineTag": month})}' - res = br.open_novisit(f"https://newrepublic.com/api/content/magazine{params}") - magazine = json.loads(res.read().decode("utf-8"))["data"] + res = br.open_novisit(f'https://newrepublic.com/api/content/magazine{params}') + magazine = json.loads(res.read().decode('utf-8'))['data'] self.log.debug(f'Found issue: {magazine["metaData"]["issueTag"]["text"]}') self.timefmt = f': {magazine["metaData"]["issueTag"]["text"]}' - self.cover_url = urljoin(self.BASE_URL, magazine["metaData"]["image"]["src"]) + self.cover_url = urljoin(self.BASE_URL, magazine['metaData']['image']['src']) feed_articles = [] for k, articles in magazine.items(): - if not (k.startswith("magazine") and articles): + if not (k.startswith('magazine') and articles): continue try: for article in articles: self.log.debug(f'Found article: {article["title"]}') feed_articles.append( { - "url": self._article_endpoint(article["nid"]), - "title": article["title"].replace("\n", " "), - "description": article.get("deck", ""), - "date": article["publishedAt"], - "section": k[len("magazine") :], + 'url': self._article_endpoint(article['nid']), + 'title': article['title'].replace('\n', ' '), + 'description': article.get('deck', ''), + 'date': article['publishedAt'], + 'section': k[len('magazine') :], } ) except TypeError: @@ -290,24 +290,24 @@ fragment ArticlePageFields on Article { pass sort_sections = [ - "Cover", - "Editorsnote", - "Features", - "StateOfTheNation", - "ResPublica", - "Columns", - "Upfront", - "Backstory", - "SignsAndWonders", - "Usandtheworld", - "Booksandthearts", - "Poetry", - "Exposure", + 'Cover', + 'Editorsnote', + 'Features', + 'StateOfTheNation', + 'ResPublica', + 'Columns', + 'Upfront', + 'Backstory', + 'SignsAndWonders', + 'Usandtheworld', + 'Booksandthearts', + 'Poetry', + 'Exposure', ] sort_category_key = cmp_to_key(lambda a, b: sort_section(a, b, sort_sections)) return [ ( - magazine["metaData"]["issueTag"]["text"], + magazine['metaData']['issueTag']['text'], sorted(feed_articles, key=sort_category_key), ) ] diff --git a/recipes/news24.recipe b/recipes/news24.recipe index 94b9979ba2..a5c4e77d06 100644 --- a/recipes/news24.recipe +++ b/recipes/news24.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1375900744(BasicNewsRecipe): title = u'News24' - description = "News24." + description = 'News24.' __author__ = 'Nicki de Wet' publisher = 'Media24' category = 'news, politics, South Africa' @@ -17,10 +17,10 @@ class AdvancedUserRecipe1375900744(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://www.24.com/images/widgethead_news.png' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{display: block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/news_busters.recipe b/recipes/news_busters.recipe index 7d8ab2d80e..dea3e7ad17 100644 --- a/recipes/news_busters.recipe +++ b/recipes/news_busters.recipe @@ -7,7 +7,7 @@ class NewsBusters(BasicNewsRecipe): __author__ = 'jde' oldest_article = 1 # day max_articles_per_feed = 100 - cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png" + cover_url = 'http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png' language = 'en' encoding = 'utf8' needs_subscription = False diff --git a/recipes/newslaundry.recipe b/recipes/newslaundry.recipe index 4fe4cf6996..3e317aaff0 100644 --- a/recipes/newslaundry.recipe +++ b/recipes/newslaundry.recipe @@ -14,7 +14,7 @@ class newslaundry(BasicNewsRecipe): encoding = 'utf-8' no_stylesheets = True remove_javascript = True - oldest_article = 7 # days + oldest_article = 7 # days resolve_internal_links = True ignore_duplicate_articles = {'url'} @@ -34,5 +34,5 @@ class newslaundry(BasicNewsRecipe): def print_version(self, url): if 'hindi.newslaundry' in url: - self.abort_article('Skipping hindi article') # remove this line if you want hindi articles. + self.abort_article('Skipping hindi article') # remove this line if you want hindi articles. return url diff --git a/recipes/newsweek_polska.recipe b/recipes/newsweek_polska.recipe index af69641762..4733dfe135 100644 --- a/recipes/newsweek_polska.recipe +++ b/recipes/newsweek_polska.recipe @@ -71,31 +71,31 @@ class Newsweek(BasicNewsRecipe): strong = p.find('strong') if strong: newest = re.compile( - "Tekst pochodzi z najnowszego numeru Tygodnika Newsweek") + r'Tekst pochodzi z najnowszego numeru Tygodnika Newsweek') if newest.search(str(strong)): strong.extract() continue itunes = p.find('a') if itunes: - reurl = re.compile("itunes.apple.com") + reurl = re.compile(r'itunes.apple.com') if reurl.search(str(itunes['href'])): p.extract() continue imagedesc = p.find('div', attrs={'class': 'image-desc'}) if imagedesc: - redesc = re.compile("Okładka numeru") + redesc = re.compile(r'Okładka numeru') if (redesc.search(str(imagedesc))): p.extract() continue # get actual contents for content in article_div.contents: - strs.append("".join(str(content))) + strs.append(''.join(str(content))) # return contents as a string - return u"".join(strs) + return u''.join(strs) # # Articles can be divided into several pages, this method parses them recursevely @@ -108,7 +108,7 @@ class Newsweek(BasicNewsRecipe): matches = re.search(r'
(.*)
', source, re.DOTALL) if matches is None: - print("no article tag found, returning...") + print('no article tag found, returning...') return main_section = BeautifulSoup(matches.group(0)) diff --git a/recipes/newtab.recipe b/recipes/newtab.recipe index 7dc3d84064..9e7a70d335 100644 --- a/recipes/newtab.recipe +++ b/recipes/newtab.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class NewTab(BasicNewsRecipe): title = '\u041D\u043E\u0432\u0430\u044F \u0432\u043A\u043B\u0430\u0434\u043A\u0430' __author__ = 'bugmen00t' - description = '\u041F\u043B\u043E\u0449\u0430\u0434\u043A\u0430 \u0434\u043B\u044F \u043F\u0443\u0431\u043B\u0438\u043A\u0430\u0446\u0438\u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0435\u0439 \u0438 \u0437\u0430\u043C\u0435\u0442\u043E\u043A \u043E \u0442\u043E\u043C, \u043A\u0430\u043A \u043C\u0435\u043D\u044F\u044E\u0442\u0441\u044F \u0440\u0435\u0433\u0438\u043E\u043D\u044B \u0420\u043E\u0441\u0441\u0438\u0438 \u043F\u043E\u0441\u043B\u0435 24 \u0444\u0435\u0432\u0440\u0430\u043B\u044F 2022 \u0433\u043E\u0434\u0430.' # noqa + description = '\u041F\u043B\u043E\u0449\u0430\u0434\u043A\u0430 \u0434\u043B\u044F \u043F\u0443\u0431\u043B\u0438\u043A\u0430\u0446\u0438\u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0435\u0439 \u0438 \u0437\u0430\u043C\u0435\u0442\u043E\u043A \u043E \u0442\u043E\u043C, \u043A\u0430\u043A \u043C\u0435\u043D\u044F\u044E\u0442\u0441\u044F \u0440\u0435\u0433\u0438\u043E\u043D\u044B \u0420\u043E\u0441\u0441\u0438\u0438 \u043F\u043E\u0441\u043B\u0435 24 \u0444\u0435\u0432\u0440\u0430\u043B\u044F 2022 \u0433\u043E\u0434\u0430.' # noqa: E501 publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0414\u0430\u043D\u0438\u043B\u043E\u0432\u0438\u0447' category = 'blog' cover_url = u'https://static.tildacdn.com/tild3765-3961-4337-b366-323437626331/thenewtab-sn.jpg' diff --git a/recipes/newtimes.recipe b/recipes/newtimes.recipe index aeb7f082fa..90c3a70bf9 100644 --- a/recipes/newtimes.recipe +++ b/recipes/newtimes.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class NewTimes(BasicNewsRecipe): title = 'The New Times' __author__ = 'bugmen00t' - description = ' \u0415\u0436\u0435\u043D\u0435\u0434\u0435\u043B\u044C\u043D\u044B\u0439 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0436\u0443\u0440\u043D\u0430\u043B' # noqa + description = ' \u0415\u0436\u0435\u043D\u0435\u0434\u0435\u043B\u044C\u043D\u044B\u0439 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0436\u0443\u0440\u043D\u0430\u043B' # noqa: E501 publisher = 'The New Times' category = 'newspaper' cover_url = u'https://newtimes.ru/img/ogimage.png' diff --git a/recipes/nezavisne_novine.recipe b/recipes/nezavisne_novine.recipe index 9ab93d928d..11b8da10af 100644 --- a/recipes/nezavisne_novine.recipe +++ b/recipes/nezavisne_novine.recipe @@ -24,10 +24,10 @@ class NezavisneNovine(BasicNewsRecipe): cover_url = strftime( 'http://pdf.nezavisne.com/slika/novina/nezavisne_novine.jpg?v=%Y%m%d') masthead_url = 'http://www.nezavisne.com/slika/osnova/nezavisne-novine-logo.gif' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/nhk_news.recipe b/recipes/nhk_news.recipe index b9dcc0ebd3..6582cbc28f 100644 --- a/recipes/nhk_news.recipe +++ b/recipes/nhk_news.recipe @@ -36,4 +36,3 @@ class ReutersJa(BasicNewsRecipe): for img in soup.findAll('img', attrs={'data-src':True}): img['src'] = img['data-src'] return soup - diff --git a/recipes/nikkei_news.recipe b/recipes/nikkei_news.recipe index e5f7fd204d..7c4f02cfb6 100644 --- a/recipes/nikkei_news.recipe +++ b/recipes/nikkei_news.recipe @@ -14,7 +14,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class NikkeiNet_paper_subscription(BasicNewsRecipe): title = u'\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\uFF08\u671D\u520A\u30FB\u5915\u520A\uFF09' __author__ = 'Ado Nishimura' - description = u'\u65E5\u7D4C\u96FB\u5B50\u7248\u306B\u3088\u308B\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\u3002\u671D\u520A\u30FB\u5915\u520A\u306F\u53D6\u5F97\u6642\u9593\u306B\u3088\u308A\u5207\u308A\u66FF\u308F\u308A\u307E\u3059\u3002\u8981\u8CFC\u8AAD' # noqa + description = u'\u65E5\u7D4C\u96FB\u5B50\u7248\u306B\u3088\u308B\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\u3002\u671D\u520A\u30FB\u5915\u520A\u306F\u53D6\u5F97\u6642\u9593\u306B\u3088\u308A\u5207\u308A\u66FF\u308F\u308A\u307E\u3059\u3002\u8981\u8CFC\u8AAD' # noqa: E501 needs_subscription = True oldest_article = 1 max_articles_per_feed = 30 @@ -26,67 +26,67 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe): masthead_url = 'http://cdn.nikkei.co.jp/parts/ds/images/common/st_nikkei_r1_20101003_1.gif' cover_margins = (10, 188, '#ffffff') - remove_tags_before = {'class': "cmn-indent"} + remove_tags_before = {'class': 'cmn-indent'} remove_tags = [ # {'class':"cmn-article_move"}, # {'class':"cmn-pr_list"}, # {'class':"cmnc-zoom"}, - {'class': "cmn-hide"}, + {'class': 'cmn-hide'}, {'name': 'form'}, {'class': 'cmn-print_headline cmn-clearfix'}, {'id': 'ABOUT_NIKKEI'}, ] - remove_tags_after = {'class': "cmn-indent"} + remove_tags_after = {'class': 'cmn-indent'} def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: - print("-------------------------open top page-------------------------------------") + print('-------------------------open top page-------------------------------------') br.open('http://www.nikkei.com/') - print("-------------------------open first login form-----------------------------") + print('-------------------------open first login form-----------------------------') try: url = list(br.links( - url_regex="www.nikkei.com/etc/accounts/login"))[0].url + url_regex='www.nikkei.com/etc/accounts/login'))[0].url except IndexError: - print("Found IndexError") + print('Found IndexError') url = 'http://www.nikkei.com/etc/accounts/login?dps=3&pageflag=top&url=http%3A%2F%2Fwww.nikkei.com%2F' except StopIteration: url = 'http://www.nikkei.com/etc/accounts/login?dps=3&pageflag=top&url=http%3A%2F%2Fwww.nikkei.com%2F' br.open(url) - print("-------------------------JS redirect(send autoPostForm)--------------------") + print('-------------------------JS redirect(send autoPostForm)--------------------') br.select_form(name='autoPostForm') br.submit() # response = br.response() - print("-------------------------got login form------------------------------------") + print('-------------------------got login form------------------------------------') br.select_form(name='LA7010Form01') br['LA7010Form01:LA7010Email'] = self.username br['LA7010Form01:LA7010Password'] = self.password br.submit(id='LA7010Form01:submitBtn') - print("-------------------------JS redirect---------------------------------------") + print('-------------------------JS redirect---------------------------------------') br.select_form(nr=0) br.submit() return br def cleanup(self): - print("-------------------------logout--------------------------------------------") + print('-------------------------logout--------------------------------------------') self.browser.open('https://regist.nikkei.com/ds/etc/accounts/logout') def parse_index(self): - print("-------------------------get index of paper--------------------------------") + print('-------------------------get index of paper--------------------------------') result = [] soup = self.index_to_soup('http://www.nikkei.com/paper/') - sections = soup.findAll(attrs={'class': re.compile(".*cmn-article_title.*")}) + sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')}) for sect in sections: - sect_title = sect.find(attrs={'class' : re.compile(".*cmnc-((large)|(middle)|(small)).*")}) + sect_title = sect.find(attrs={'class': re.compile(r'.*cmnc-((large)|(middle)|(small)).*')}) if sect_title is None: continue sect_title = sect_title.contents[0] sect_result = [] url = sect.a['href'] - url = re.sub("/article/", "/print-article/", url) + url = url.replace('/article/', '/print-article/') url = 'http://www.nikkei.com' + url sect_result.append(dict(title=sect_title, url=url, date='',description='', content='')) result.append([sect_title, sect_result]) @@ -95,11 +95,11 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe): def populate_article_metadata(self, article, soup, first): try: elms = soup.findAll( - 'div', {"class": "cmn-article_text JSID_key_fonttxt"}) + 'div', {'class': 'cmn-article_text JSID_key_fonttxt'}) elm_text = u'◆'.join( [self.tag_to_string(elm).strip() for elm in elms]) elm_text = unicodedata.normalize('NFKC', elm_text) article.summary = article.text_summary = elm_text except: - self.log("Error: Failed to get article summary.") + self.log('Error: Failed to get article summary.') return diff --git a/recipes/nikkeiasia.recipe b/recipes/nikkeiasia.recipe index c9c55c88b4..5a725bb808 100644 --- a/recipes/nikkeiasia.recipe +++ b/recipes/nikkeiasia.recipe @@ -29,12 +29,12 @@ class Nikkei(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False - extra_css = """ + extra_css = ''' .subhead { font-style:italic; color:#202020; } em, blockquote { color:#202020; } .sec, .byline { font-size:small; font-weight:bold; } .article__image, .article__caption { font-size:small; text-align:center; } - """ + ''' recipe_specific_options = { 'date': {'short': 'The edition date (YYYY-MM-DD format)', 'long': '2024-09-19'} diff --git a/recipes/njuz_net.recipe b/recipes/njuz_net.recipe index 7db58348d5..4977ea42a4 100644 --- a/recipes/njuz_net.recipe +++ b/recipes/njuz_net.recipe @@ -23,18 +23,18 @@ class NjuzNet(BasicNewsRecipe): language = 'sr' publication_type = 'newsportal' masthead_url = 'http://www.njuz.net/njuznet.jpg' - extra_css = """ + extra_css = ''' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .articledescription{font-family: serif1, serif} .wp-caption-text{font-size: x-small} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 keep_only_tags = [ dict(attrs={'id': 'entryMeta'}), dict(attrs={'class': 'post'}) diff --git a/recipes/nme.recipe b/recipes/nme.recipe index 80e8481251..ada5fb066e 100644 --- a/recipes/nme.recipe +++ b/recipes/nme.recipe @@ -27,7 +27,6 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): simultaneous_downloads = 20 use_embedded_content = False recursions = 0 - auto_cleanup = True feeds = [ (u'NME News', u'http://www.nme.com/news/feed'), diff --git a/recipes/nordjyske_dk.recipe b/recipes/nordjyske_dk.recipe index 76a71e9454..8d9132941e 100644 --- a/recipes/nordjyske_dk.recipe +++ b/recipes/nordjyske_dk.recipe @@ -38,4 +38,3 @@ class Nordjyske_dk(BasicNewsRecipe): ('Aalborg', 'http://nordjyske.dk/rss/aalborg'), ] - diff --git a/recipes/novaya_gazeta_europe.recipe b/recipes/novaya_gazeta_europe.recipe index 9e7b4df57c..8a32e21b2e 100644 --- a/recipes/novaya_gazeta_europe.recipe +++ b/recipes/novaya_gazeta_europe.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class NovayaGazetaEurope(BasicNewsRecipe): title = u'\u041D\u043E\u0432\u0430\u044F \u0413\u0430\u0437\u0435\u0442\u0430. \u0415\u0432\u0440\u043E\u043F\u0430' __author__ = 'bugmen00t' - description = u'\u0413\u043E\u0432\u043E\u0440\u0438\u043C \u043A\u0430\u043A \u0435\u0441\u0442\u044C. \u041F\u0438\u0448\u0435\u043C \u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u044F\u0449\u0435\u043C \u0432 \u0420\u043E\u0441\u0441\u0438\u0438, \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0415\u0432\u0440\u043E\u043F\u0435. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043C\u043D\u0435\u043D\u0438\u044F \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0435 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F.' # noqa + description = u'\u0413\u043E\u0432\u043E\u0440\u0438\u043C \u043A\u0430\u043A \u0435\u0441\u0442\u044C. \u041F\u0438\u0448\u0435\u043C \u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u044F\u0449\u0435\u043C \u0432 \u0420\u043E\u0441\u0441\u0438\u0438, \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0415\u0432\u0440\u043E\u043F\u0435. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043C\u043D\u0435\u043D\u0438\u044F \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0435 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F.' # noqa: E501 publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u041C\u0430\u0440\u0442\u044B\u043D\u043E\u0432' publication_type = 'newspaper' category = 'news' diff --git a/recipes/novilist_novine_hr.recipe b/recipes/novilist_novine_hr.recipe index 6939447607..61e6f78e71 100644 --- a/recipes/novilist_novine_hr.recipe +++ b/recipes/novilist_novine_hr.recipe @@ -27,16 +27,16 @@ class NoviList_hr(BasicNewsRecipe): needs_subscription = True masthead_url = 'http://novine.novilist.hr/images/system/novilist-logo.jpg' index = 'http://novine.novilist.hr/' - extra_css = """ + extra_css = ''' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Geneva,Arial,Helvetica,Swiss,sans1,sans-serif } img{display:block; margin-bottom: 0.4em; margin-top: 0.4em} .nadnaslov,.podnaslov{font-size: small; display: block; margin-bottom: 1em} .naslov{font-size: x-large; color: maroon; font-weight: bold; display: block; margin-bottom: 1em;} p{display: block} - """ + ''' - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True diff --git a/recipes/novinite_bg.recipe b/recipes/novinite_bg.recipe index 0bf794da60..b3db41c18e 100644 --- a/recipes/novinite_bg.recipe +++ b/recipes/novinite_bg.recipe @@ -13,8 +13,10 @@ class AdvancedUserRecipe1329123365(BasicNewsRecipe): no_stylesheets = True remove_javascript = True keep_only_tags = [dict(name='div', attrs={'id': 'content'})] - remove_tags = [dict(name='div', attrs={'id': 'text_options'})] - remove_tags = [dict(name='div', attrs={'id': 'social_shares_top'})] + remove_tags = [ + dict(name='div', attrs={'id': 'text_options'}), + dict(name='div', attrs={'id': 'social_shares_top'}), + ] remove_tags_after = dict(id='textsize') feeds = [(u'Business', u'http://novinite.bg/rss.php?category_id=1'), (u'Politics', u'http://novinite.bg/rss.php?category_id=2'), diff --git a/recipes/novinky.cz.recipe b/recipes/novinky.cz.recipe index c30dc259d7..e131e798e8 100644 --- a/recipes/novinky.cz.recipe +++ b/recipes/novinky.cz.recipe @@ -24,12 +24,12 @@ class novinkyRecipe(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}), - dict(name='div', attrs={'id': ['discussionEntry']}), - dict(name='span', attrs={ + remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}), + dict(name='div', attrs={'id': ['discussionEntry']}), + dict(name='span', attrs={ 'id': ['mynews-hits', 'mynews-author']}), - dict(name='div', attrs={'class': ['related']}), - dict(name='div', attrs={'id': ['multimediaInfo']})] + dict(name='div', attrs={'class': ['related']}), + dict(name='div', attrs={'id': ['multimediaInfo']})] remove_tags_before = dict(name='div', attrs={'class': ['articleHeader']}) remove_tags_after = dict(name='div', attrs={'class': 'related'}) diff --git a/recipes/novosti.recipe b/recipes/novosti.recipe index 7a4d19f4d2..442b2b4c5c 100644 --- a/recipes/novosti.recipe +++ b/recipes/novosti.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Novosti(BasicNewsRecipe): title = 'Vecernje Novosti' __author__ = 'Darko Miletic' - description = 'U početku su bile istinske večernje novine - pokrenute u vreme Tršćanske krize, Italijansko-jugoslovenskog konflikta oko grada Trsta - ali su brzo izrasle u dnevni informativno-politički list, koji već godinama ima najveći tiraž u Srbiji.' # noqa + description = 'U početku su bile istinske večernje novine - pokrenute u vreme Tršćanske krize, Italijansko-jugoslovenskog konflikta oko grada Trsta - ali su brzo izrasle u dnevni informativno-politički list, koji već godinama ima najveći tiraž u Srbiji.' # noqa: E501 publisher = 'Kompanija Novosti' category = 'news, politics, Serbia' oldest_article = 2 @@ -24,18 +24,18 @@ class Novosti(BasicNewsRecipe): language = 'sr' publication_type = 'newspaper' masthead_url = 'http://www.novosti.rs/images/basic/logo-print.png' - extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + extra_css = ''' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} .author{font-size: small} .articleLead{font-size: large; font-weight: bold} img{display: block; margin-bottom: 1em; margin-top: 1em} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'pretty_print': True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 keep_only_tags = [dict(attrs={'class': [ 'articleTitle', 'articleInfo', 'articleLead', 'singlePhoto fl', 'articleBody']})] diff --git a/recipes/nrc.nl.recipe b/recipes/nrc.nl.recipe index a03fb6269e..4f5b29fffc 100644 --- a/recipes/nrc.nl.recipe +++ b/recipes/nrc.nl.recipe @@ -39,7 +39,7 @@ class NRC(BasicNewsRecipe): ), dict(name=['script', 'noscript', 'style']), ] - remove_attributes = ["class", "id", "name", "style"] + remove_attributes = ['class', 'id', 'name', 'style'] encoding = 'utf-8' no_stylesheets = True ignore_duplicate_articles = {'url'} @@ -52,8 +52,8 @@ class NRC(BasicNewsRecipe): title_regexp = None @staticmethod - def _monthly_list_url(date, fmt="%Y/%m/"): - return "https://www.nrc.nl/de/data/NH/" + date.strftime(fmt) + def _monthly_list_url(date, fmt='%Y/%m/'): + return 'https://www.nrc.nl/de/data/NH/' + date.strftime(fmt) def _clean_article_title(self, title): if not title: @@ -62,7 +62,7 @@ class NRC(BasicNewsRecipe): self.title_regexp = re.compile( r'([^<]+)\s*' ) - return self.title_regexp.sub(r"\1 ", title) + return self.title_regexp.sub(r'\1 ', title) def parse_index(self): sections = [] @@ -88,43 +88,43 @@ class NRC(BasicNewsRecipe): issues = json.loads(r.read()) if len(issues) > 0: issue_date = datetime.datetime.strptime( - issues[0]["published_at"], "%Y-%m-%dT%H:%M:%SZ" + issues[0]['published_at'], '%Y-%m-%dT%H:%M:%SZ' ) - issue_url = self._monthly_list_url(issue_date, "%Y/%m/%d/") - self.frontpage = issues[0]["frontpage"] + issue_url = self._monthly_list_url(issue_date, '%Y/%m/%d/') + self.frontpage = issues[0]['frontpage'] break if issue_url is None: return [] with closing(self.browser.open(Request(issue_url, None, headers))) as r: edition = json.loads(r.read()) documents = {} - for headline in edition["paperheadlines"]: - item = headline["item"] - documents[headline["document_id"]] = dict( - url=item["full_url"], - headline=self._clean_article_title(item["headline"]) + for headline in edition['paperheadlines']: + item = headline['item'] + documents[headline['document_id']] = dict( + url=item['full_url'], + headline=self._clean_article_title(item['headline']) ) - for section in edition["sections"]: + for section in edition['sections']: articles = [] - for doc in section["document_ids"]: + for doc in section['document_ids']: if doc not in documents: self.log.warn('Document not found:', doc) continue articles.append( dict( - title=documents[doc]["headline"], url=documents[doc]["url"] + title=documents[doc]['headline'], url=documents[doc]['url'] ) ) - sections.append((section["name"], articles)) + sections.append((section['name'], articles)) return sections def preprocess_html(self, soup): for tag in soup(): if tag.name == 'img': if tag.has_attr('data-src-medium'): - tag['src'] = tag['data-src-medium'].split("|")[0] + tag['src'] = tag['data-src-medium'].split('|')[0] elif tag.has_attr('data-src'): - tag['src'] = tag['data-src'].split("|")[0] + tag['src'] = tag['data-src'].split('|')[0] if tag['src'].startswith('//'): tag['src'] = 'https:' + tag['src'] elif tag['src'].startswith('/'): diff --git a/recipes/nrc_next.recipe b/recipes/nrc_next.recipe index 42886190d2..237feaac3c 100644 --- a/recipes/nrc_next.recipe +++ b/recipes/nrc_next.recipe @@ -45,8 +45,7 @@ class NRCNext(BasicNewsRecipe): raise ValueError('Failed to login, check username and password') epubraw = None for today in (date.today(), date.today() - timedelta(days=1),): - url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \ - % today.strftime('%Y%m%d') + url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' % today.strftime('%Y%m%d') self.log('Trying to download epub from:', url) try: response3 = br.open(url, timeout=60) @@ -62,7 +61,7 @@ class NRCNext(BasicNewsRecipe): zfile = zipfile.ZipFile(BytesIO(epubraw), 'r') zfile.extractall(self.output_dir) namelist = zfile.namelist() - emre = re.compile("<em(?:.*)>(.*)</em>") + emre = re.compile(r'<em(?:.*)>(.*)</em>') subst = '\\1' for name in namelist: _, ext = os.path.splitext(name) diff --git a/recipes/nspm.recipe b/recipes/nspm.recipe index 2b89b3a604..a05107c411 100644 --- a/recipes/nspm.recipe +++ b/recipes/nspm.recipe @@ -33,19 +33,19 @@ class Nspm(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' - extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + extra_css = ''' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Times New Roman", serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} img{margin-top:0.5em; margin-bottom: 0.7em; display: block} .author{color: #990000; font-weight: bold} - .author,.createdate{font-size: 0.9em} """ + .author,.createdate{font-size: 0.9em} ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'pretty_print': True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 remove_tags = [dict(name=['link', 'script', 'meta', 'base', 'img'])] remove_attributes = ['width', 'height', 'lang', 'xmlns:fb', 'xmlns:og', 'vspace', 'hspace', 'type', 'start', 'size'] diff --git a/recipes/nspm_int.recipe b/recipes/nspm_int.recipe index a562a05628..9309c88bfb 100644 --- a/recipes/nspm_int.recipe +++ b/recipes/nspm_int.recipe @@ -22,12 +22,12 @@ class Nspm_int(BasicNewsRecipe): delay = 2 publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' - extra_css = """ + extra_css = ''' body{font-family: "Times New Roman", serif} .article_description{font-family: Arial, sans-serif} img{margin-top:0.5em; margin-bottom: 0.7em} .author{color: #990000; font-weight: bold} - .author,.createdate{font-size: 0.9em} """ + .author,.createdate{font-size: 0.9em} ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True diff --git a/recipes/nu.recipe b/recipes/nu.recipe index 80dd9a19f0..a40d392304 100644 --- a/recipes/nu.recipe +++ b/recipes/nu.recipe @@ -49,4 +49,4 @@ class Nu(BasicNewsRecipe): (u'Podcast Algemeen nieuws', u'http://www.nu.nl/podcast.php')] def get_article_url(self, article): - return article.get('guid', None) + return article.get('guid', None) diff --git a/recipes/nv_ru.recipe b/recipes/nv_ru.recipe index c651b1ec09..bc069cc032 100644 --- a/recipes/nv_ru.recipe +++ b/recipes/nv_ru.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class NV(BasicNewsRecipe): title = '\u041D\u0412 (\u00AB\u041D\u043E\u0432\u043E\u0435 \u0432\u0440\u0435\u043C\u044F\u00BB)' __author__ = 'bugmen00t' - description = '\u0415\u0436\u0435\u043D\u0435\u0434\u0435\u043B\u044C\u043D\u044B\u0439 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0438 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0440\u0435\u0441\u0443\u0440\u0441.' # noqa - publisher = '\u041E\u041E\u041E \u00AB\u0418\u0437\u0434\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u0438\u0439 \u0434\u043E\u043C \u00AB\u041C\u0435\u0434\u0438\u0430-\u0414\u041A\u00BB' # noqa + description = '\u0415\u0436\u0435\u043D\u0435\u0434\u0435\u043B\u044C\u043D\u044B\u0439 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0438 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0440\u0435\u0441\u0443\u0440\u0441.' # noqa: E501 + publisher = '\u041E\u041E\u041E \u00AB\u0418\u0437\u0434\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u0438\u0439 \u0434\u043E\u043C \u00AB\u041C\u0435\u0434\u0438\u0430-\u0414\u041A\u00BB' # noqa: E501 category = 'magazine' cover_url = u'https://yes-ukraine.org/imglib/_newimage/Yalta-annual-meeting/11th-yalta-annual-meeting/partners/media-partneri/nv_logo.png' language = 'ru_UK' diff --git a/recipes/nv_ua.recipe b/recipes/nv_ua.recipe index cb7ced20a6..205bf63336 100644 --- a/recipes/nv_ua.recipe +++ b/recipes/nv_ua.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class NV(BasicNewsRecipe): title = '\u041D\u0412 (\u00AB\u041D\u043E\u0432\u0438\u0439 \u0427\u0430\u0441\u00BB)' __author__ = 'bugmen00t' - description = '\u0429\u043E\u0442\u0438\u0436\u043D\u0435\u0432\u0438\u0439 \u0441\u0443\u0441\u043F\u0456\u043B\u044C\u043D\u043E-\u043F\u043E\u043B\u0456\u0442\u0438\u0447\u043D\u0438\u0439 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u043E-\u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0439 \u0441\u0430\u0439\u0442.' # noqa - publisher = '\u0422\u041E\u0412 \u00AB\u0412\u0438\u0434\u0430\u0432\u043D\u0438\u0447\u0438\u0439 \u0434\u0456\u043C \u00AB\u041C\u0435\u0434\u0456\u0430-\u0414\u041A\u00BB' # noqa + description = '\u0429\u043E\u0442\u0438\u0436\u043D\u0435\u0432\u0438\u0439 \u0441\u0443\u0441\u043F\u0456\u043B\u044C\u043D\u043E-\u043F\u043E\u043B\u0456\u0442\u0438\u0447\u043D\u0438\u0439 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u043E-\u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0439 \u0441\u0430\u0439\u0442.' # noqa: E501 + publisher = '\u0422\u041E\u0412 \u00AB\u0412\u0438\u0434\u0430\u0432\u043D\u0438\u0447\u0438\u0439 \u0434\u0456\u043C \u00AB\u041C\u0435\u0434\u0456\u0430-\u0414\u041A\u00BB' # noqa: E501 category = 'magazine' cover_url = u'https://static.nv.ua/shared/system/Article/posters/002/467/576/600x300/272ab80cd01e4022dcb4c691c9073e97.jpg' language = 'uk' diff --git a/recipes/nyt_magazine.recipe b/recipes/nyt_magazine.recipe index 52c4a38b0d..e548976fcc 100644 --- a/recipes/nyt_magazine.recipe +++ b/recipes/nyt_magazine.recipe @@ -66,7 +66,7 @@ class NytMag(BasicNewsRecipe): if c.lower() == 'yes': self.compress_news_images = True - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -74,7 +74,7 @@ class NytMag(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nyt_tmag.recipe b/recipes/nyt_tmag.recipe index b4b0a25a18..718a37adcb 100644 --- a/recipes/nyt_tmag.recipe +++ b/recipes/nyt_tmag.recipe @@ -66,7 +66,7 @@ class NytMag(BasicNewsRecipe): if c.lower() == 'yes': self.compress_news_images = True - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -74,7 +74,7 @@ class NytMag(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe index bf05758c1e..136e707ec1 100644 --- a/recipes/nytfeeds.recipe +++ b/recipes/nytfeeds.recipe @@ -92,7 +92,7 @@ class NytFeeds(BasicNewsRecipe): if c.lower() == 'yes': self.compress_news_images = True - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -100,7 +100,7 @@ class NytFeeds(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index 8e6013895b..7d81bb0d15 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -7,7 +7,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera import datetime import json import re -from pprint import pprint # noqa from calibre import strftime from calibre.ebooks.BeautifulSoup import Tag @@ -88,7 +87,7 @@ class NewYorkTimes(BasicNewsRecipe): is_web_edition = True oldest_web_edition_article = 7 # days - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -96,7 +95,7 @@ class NewYorkTimes(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nytimes_sports.recipe b/recipes/nytimes_sports.recipe index 8086ca729a..f07b5f285e 100644 --- a/recipes/nytimes_sports.recipe +++ b/recipes/nytimes_sports.recipe @@ -6,7 +6,7 @@ from __future__ import with_statement __license__ = 'GPL 3' __copyright__ = 'zotzo' __docformat__ = 'restructuredtext en' -""" +''' http://fifthdown.blogs.nytimes.com/ http://offthedribble.blogs.nytimes.com/ http://thequad.blogs.nytimes.com/ @@ -16,7 +16,7 @@ http://bats.blogs.nytimes.com/ http://straightsets.blogs.nytimes.com/ http://formulaone.blogs.nytimes.com/ http://onpar.blogs.nytimes.com/ -""" +''' from calibre.web.feeds.news import BasicNewsRecipe @@ -32,7 +32,6 @@ class NYTimesSports(BasicNewsRecipe): max_articles_per_feed = 25 use_embedded_content = False no_stylesheets = True - language = 'en' delay = 1 feeds = [ diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index ba28033827..ef0dd2157d 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -7,7 +7,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera import datetime import json import re -from pprint import pprint # noqa from calibre import strftime from calibre.ebooks.BeautifulSoup import Tag @@ -88,7 +87,7 @@ class NewYorkTimes(BasicNewsRecipe): is_web_edition = False oldest_web_edition_article = 7 # days - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -96,7 +95,7 @@ class NewYorkTimes(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nytimes_tech.recipe b/recipes/nytimes_tech.recipe index a7d9009f06..5d76fcd862 100644 --- a/recipes/nytimes_tech.recipe +++ b/recipes/nytimes_tech.recipe @@ -60,7 +60,7 @@ class NytTech(BasicNewsRecipe): if c.lower() == 'yes': self.compress_news_images = True - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -68,7 +68,7 @@ class NytTech(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' @property def nyt_parser(self): diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe index fb9e3558c2..df8eddc39a 100644 --- a/recipes/nytimesbook.recipe +++ b/recipes/nytimesbook.recipe @@ -24,7 +24,7 @@ class NewYorkTimesBookReview(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} encoding = 'utf-8' - extra_css = """ + extra_css = ''' .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } @@ -32,7 +32,7 @@ class NewYorkTimesBookReview(BasicNewsRecipe): .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } img { display:block; margin:0 auto; } - """ + ''' articles_are_obfuscated = use_wayback_machine diff --git a/recipes/nzz_ger.recipe b/recipes/nzz_ger.recipe index 3c7a1c15fd..9f6e4ea0c7 100644 --- a/recipes/nzz_ger.recipe +++ b/recipes/nzz_ger.recipe @@ -97,7 +97,6 @@ class Nzz(BasicNewsRecipe): return url - def get_browser(self, *args, **kwargs): kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) diff --git a/recipes/observatorul_cultural.recipe b/recipes/observatorul_cultural.recipe index 4c91f8859e..07aa0d3635 100644 --- a/recipes/observatorul_cultural.recipe +++ b/recipes/observatorul_cultural.recipe @@ -31,8 +31,7 @@ class ObservatorulCultural(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup( 'http://www.observatorcultural.ro/Arhiva*-archive.html') - issueTag = soup.find('a', href=re.compile( - "observatorcultural.ro\\/Numarul")) + issueTag = soup.find('a', href=re.compile(r'observatorcultural.ro/Numarul')) issueURL = issueTag['href'] print(issueURL) issueSoup = self.index_to_soup(issueURL) diff --git a/recipes/observer_gb.recipe b/recipes/observer_gb.recipe index dc49dc2d23..7a844061de 100644 --- a/recipes/observer_gb.recipe +++ b/recipes/observer_gb.recipe @@ -27,18 +27,18 @@ class Guardian(BasicNewsRecipe): keep_only_tags = [ dict(name='div', attrs={ - 'id': ["content", "article_header", "main-article-info", ]}), + 'id': ['content', 'article_header', 'main-article-info', ]}), ] remove_tags = [ dict(name='div', attrs={ - 'class': ["video-content", "videos-third-column"]}), + 'class': ['video-content', 'videos-third-column']}), dict(name='div', attrs={ - 'id': ["article-toolbox", "subscribe-feeds", ]}), + 'id': ['article-toolbox', 'subscribe-feeds', ]}), dict(name='div', attrs={ - 'class': ["promo-component bookshop-books-promo bookshop-books"]}), - dict(name='ul', attrs={'class': ["pagination"]}), - dict(name='ul', attrs={'id': ["content-actions"]}), - dict(name='li', attrs={'id': ["product-image"]}), + 'class': ['promo-component bookshop-books-promo bookshop-books']}), + dict(name='ul', attrs={'class': ['pagination']}), + dict(name='ul', attrs={'id': ['content-actions']}), + dict(name='li', attrs={'id': ['product-image']}), ] use_embedded_content = False diff --git a/recipes/oc_register.recipe b/recipes/oc_register.recipe index 5e489b404d..d0799cf17b 100644 --- a/recipes/oc_register.recipe +++ b/recipes/oc_register.recipe @@ -36,18 +36,18 @@ class OrangeCountyRegister(BasicNewsRecipe): def parsePage(self, index): if self.debugMessages is True: - print("\n\nStarting " + self.feeds[index][0]) + print('\n\nStarting ' + self.feeds[index][0]) articleList = [] soup = self.index_to_soup(self.feeds[index][1]) # Have this index page now. # look for a.article-title # If any, the description is
- for newsentry in soup.findAll("a", {"class": "article-title"}): + for newsentry in soup.findAll('a', {'class': 'article-title'}): print('Next up:') print(newsentry) - title = newsentry["title"] + title = newsentry['title'] url = newsentry['href'] - print("Title: ") + print('Title: ') print(title) print('URL') print(url) @@ -66,19 +66,19 @@ class OrangeCountyRegister(BasicNewsRecipe): def extract_readable_article(self, html, url): cleanedHTML = super(OrangeCountyRegister, self).extract_readable_article(html, url) - print("Processing html for author") + print('Processing html for author') # Find the attribs... attribDict = self.htmlToAttribsDict(html) - print("dict is type...") + print('dict is type...') print(type(attribDict)) author = attribDict.get('Byline') if author is not None: # add author code after - print("Adding author in meta") + print('Adding author in meta') print(author) cleanedHTML = cleanedHTML.replace( - "", - "\n
\n" + '', + '\n
\n' ) else: print('no author found') @@ -92,7 +92,7 @@ class OrangeCountyRegister(BasicNewsRecipe): def htmlToAttribsDict(self, rawHTML): tokenStart = 'dataLayer.push({' tokenEnd = '});' - print("1") + print('1') startJSON = rawHTML.find(tokenStart) if (startJSON < 0): return @@ -101,13 +101,13 @@ class OrangeCountyRegister(BasicNewsRecipe): if (endJSON < 0): return JSON = JSONBeginning[:endJSON + 1] - JSONQuoted = JSON.replace("'", "\"") + JSONQuoted = JSON.replace("'", '"') try: metadata = json.loads(JSONQuoted) pprint(metadata) return metadata except ValueError: - print("Could not decode JSON:") + print('Could not decode JSON:') print(JSONQuoted) return None @@ -126,7 +126,7 @@ class OrangeCountyRegister(BasicNewsRecipe): # articleList = [] ans = [] feedsCount = len(self.feeds) - for x in range(0, feedsCount - 1): # should be ,4 + for x in range(feedsCount - 1): # should be ,4 feedarticles = self.parsePage(x) if feedarticles is not None: ans.append((self.feeds[x][0], feedarticles)) diff --git a/recipes/old_games.recipe b/recipes/old_games.recipe index 063eb2a086..508eba0a85 100644 --- a/recipes/old_games.recipe +++ b/recipes/old_games.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class OGRU(BasicNewsRecipe): title = u'Old-Games.RU' __author__ = 'bugmen00t' - description = u'Old-Games.RU \u2014 \u043A\u0440\u0443\u043F\u043D\u0435\u0439\u0448\u0438\u0439 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0439 \u0430\u0440\u0445\u0438\u0432 \u0441\u0442\u0430\u0440\u044B\u0445 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u043D\u044B\u0445 \u0438\u0433\u0440. \u041C\u044B \u043D\u0435 \u0441\u0442\u0430\u0432\u0438\u043C \u043F\u0435\u0440\u0435\u0434 \u0441\u043E\u0431\u043E\u0439 \u0446\u0435\u043B\u0438 \u0441\u043E\u0431\u0440\u0430\u0442\u044C \u0432\u0441\u0435 \u0438\u0433\u0440\u044B, \u0447\u0442\u043E \u0435\u0441\u0442\u044C \u0432 \u043C\u0438\u0440\u0435, \u043D\u043E \u043C\u044B \u0441\u0442\u0430\u0440\u0430\u0435\u043C\u0441\u044F, \u0447\u0442\u043E\u0431\u044B \u043D\u0430 \u0441\u0430\u0439\u0442\u0435 \u0431\u044B\u043B\u043E \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043B\u0435\u043D\u043E \u0431\u043E\u043B\u044C\u0448\u0438\u043D\u0441\u0442\u0432\u043E \u0448\u0435\u0434\u0435\u0432\u0440\u043E\u0432, \u0440\u0435\u0434\u043A\u043E\u0441\u0442\u0435\u0439 \u0438 \u043F\u0440\u043E\u0441\u0442\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0445 \u043F\u0440\u043E\u0435\u043A\u0442\u043E\u0432 \u043F\u0440\u043E\u0448\u043B\u044B\u0445 \u043B\u0435\u0442. \u0421 \u0442\u0435\u0447\u0435\u043D\u0438\u0435\u043C \u0432\u0440\u0435\u043C\u0435\u043D\u0438 \u0433\u0440\u0430\u0444\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0438 \u0437\u0432\u0443\u043A\u043E\u0432\u043E\u0435 \u043E\u0444\u043E\u0440\u043C\u043B\u0435\u043D\u0438\u0435 \u0438\u0433\u0440 \u043D\u0430\u0448\u0435\u0433\u043E \u0430\u0440\u0445\u0438\u0432\u0430 \u0437\u0430\u043C\u0435\u0442\u043D\u043E \u0443\u0441\u0442\u0430\u0440\u0435\u043B\u043E, \u043D\u043E \u0438\u0433\u0440\u043E\u0432\u043E\u0439 \u043F\u0440\u043E\u0446\u0435\u0441\u0441 \u043E\u0441\u0442\u0430\u043B\u0441\u044F \u043F\u0440\u0435\u0436\u043D\u0438\u043C, \u0438 \u043F\u043E\u0440\u043E\u0439 \u043E\u043D \u0433\u043E\u0440\u0430\u0437\u0434\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u0435\u0435, \u0447\u0435\u043C \u0432\u043E \u043C\u043D\u043E\u0433\u0438\u0445 \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u00AB\u0445\u0438\u0442\u0430\u0445\u00BB.' # noqa + description = u'Old-Games.RU \u2014 \u043A\u0440\u0443\u043F\u043D\u0435\u0439\u0448\u0438\u0439 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0439 \u0430\u0440\u0445\u0438\u0432 \u0441\u0442\u0430\u0440\u044B\u0445 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u043D\u044B\u0445 \u0438\u0433\u0440. \u041C\u044B \u043D\u0435 \u0441\u0442\u0430\u0432\u0438\u043C \u043F\u0435\u0440\u0435\u0434 \u0441\u043E\u0431\u043E\u0439 \u0446\u0435\u043B\u0438 \u0441\u043E\u0431\u0440\u0430\u0442\u044C \u0432\u0441\u0435 \u0438\u0433\u0440\u044B, \u0447\u0442\u043E \u0435\u0441\u0442\u044C \u0432 \u043C\u0438\u0440\u0435, \u043D\u043E \u043C\u044B \u0441\u0442\u0430\u0440\u0430\u0435\u043C\u0441\u044F, \u0447\u0442\u043E\u0431\u044B \u043D\u0430 \u0441\u0430\u0439\u0442\u0435 \u0431\u044B\u043B\u043E \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043B\u0435\u043D\u043E \u0431\u043E\u043B\u044C\u0448\u0438\u043D\u0441\u0442\u0432\u043E \u0448\u0435\u0434\u0435\u0432\u0440\u043E\u0432, \u0440\u0435\u0434\u043A\u043E\u0441\u0442\u0435\u0439 \u0438 \u043F\u0440\u043E\u0441\u0442\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0445 \u043F\u0440\u043E\u0435\u043A\u0442\u043E\u0432 \u043F\u0440\u043E\u0448\u043B\u044B\u0445 \u043B\u0435\u0442. \u0421 \u0442\u0435\u0447\u0435\u043D\u0438\u0435\u043C \u0432\u0440\u0435\u043C\u0435\u043D\u0438 \u0433\u0440\u0430\u0444\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0438 \u0437\u0432\u0443\u043A\u043E\u0432\u043E\u0435 \u043E\u0444\u043E\u0440\u043C\u043B\u0435\u043D\u0438\u0435 \u0438\u0433\u0440 \u043D\u0430\u0448\u0435\u0433\u043E \u0430\u0440\u0445\u0438\u0432\u0430 \u0437\u0430\u043C\u0435\u0442\u043D\u043E \u0443\u0441\u0442\u0430\u0440\u0435\u043B\u043E, \u043D\u043E \u0438\u0433\u0440\u043E\u0432\u043E\u0439 \u043F\u0440\u043E\u0446\u0435\u0441\u0441 \u043E\u0441\u0442\u0430\u043B\u0441\u044F \u043F\u0440\u0435\u0436\u043D\u0438\u043C, \u0438 \u043F\u043E\u0440\u043E\u0439 \u043E\u043D \u0433\u043E\u0440\u0430\u0437\u0434\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u0435\u0435, \u0447\u0435\u043C \u0432\u043E \u043C\u043D\u043E\u0433\u0438\u0445 \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u00AB\u0445\u0438\u0442\u0430\u0445\u00BB.' # noqa: E501 publisher = 'Old-Games.RU' publication_type = 'blog' category = 'news, games, retro' @@ -24,7 +24,7 @@ class OGRU(BasicNewsRecipe): remove_attributes = ['style'] - remove_tags = [ + remove_tags = [ dict(name='p', attrs={'id': 'pageDescription'}), dict(name='div', attrs={'class': 'pageNavLinkGroup'}), dict(name='div', attrs={'class': 'tagBlock TagContainer'}), diff --git a/recipes/omgubuntu.recipe b/recipes/omgubuntu.recipe index 9242c5c683..0da184ad37 100644 --- a/recipes/omgubuntu.recipe +++ b/recipes/omgubuntu.recipe @@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class OMGUbuntu(BasicNewsRecipe): - title = u"Omg! Ubuntu!" - description = u"Online news site covering Ubuntu activities. Recipe pulls articles from past 7 days." + title = u'Omg! Ubuntu!' + description = u'Online news site covering Ubuntu activities. Recipe pulls articles from past 7 days.' language = 'en' oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/onda_rock.recipe b/recipes/onda_rock.recipe index fc0f0d811d..310de65e4d 100644 --- a/recipes/onda_rock.recipe +++ b/recipes/onda_rock.recipe @@ -23,9 +23,9 @@ class AdvancedUserRecipe1328535130(BasicNewsRecipe): ] no_stylesheets = True feeds = [(u'Onda Rock', u'http://www.ondarock.it/feed.php')] - masthead_url = 'http://api.ning.com/files/4ot8ampp*-rYQuwL2NoaHvVqcyu7VMyWyan12a9QMsJUWxk-q5V1-34wnD-Wj9B5qWjc1yPMLGiwQg8hZJxaySeaG2lx8hpV/2009_banner_ondarock.gif' # noqa + masthead_url = 'http://api.ning.com/files/4ot8ampp*-rYQuwL2NoaHvVqcyu7VMyWyan12a9QMsJUWxk-q5V1-34wnD-Wj9B5qWjc1yPMLGiwQg8hZJxaySeaG2lx8hpV/2009_banner_ondarock.gif' # noqa: E501 - extra_css = ''' # noqa + extra_css = ''' .boxtabscontain_page { border: 1px solid #E0E0E0;clear: both; font-family: "Verdana", "Arial", "Helvetica", sans-serif; diff --git a/recipes/onemagazine.recipe b/recipes/onemagazine.recipe index a5a4d7b610..9306693302 100644 --- a/recipes/onemagazine.recipe +++ b/recipes/onemagazine.recipe @@ -49,7 +49,7 @@ class Onemagazine(BasicNewsRecipe): ] remove_tags = [ - dict(name='p', attrs={'class': ['info']}), dict(name='table', attrs={'class': ['connect_widget_interactive_area']}), dict(name='span', attrs={'class': ['photo']}), dict(name='div', attrs={'class': ['counter']}), dict(name='div', attrs={'class': ['carousel']}), dict(name='div', attrs={'class': ['jcarousel-container jcarousel-container-horizontal']}) # noqa + dict(name='p', attrs={'class': ['info']}), dict(name='table', attrs={'class': ['connect_widget_interactive_area']}), dict(name='span', attrs={'class': ['photo']}), dict(name='div', attrs={'class': ['counter']}), dict(name='div', attrs={'class': ['carousel']}), dict(name='div', attrs={'class': ['jcarousel-container jcarousel-container-horizontal']}) # noqa: E501 ] remove_tags_after = [ diff --git a/recipes/opennet.recipe b/recipes/opennet.recipe index 5d22ca4afa..fa457cdef0 100644 --- a/recipes/opennet.recipe +++ b/recipes/opennet.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class PaperPaper(BasicNewsRecipe): title = 'OpenNet.ru' __author__ = 'bugmen00t' - description = '\u0420\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u044B\u0439 \u043F\u043E\u0440\u0442\u0430\u043B, \u043F\u043E\u0441\u0432\u044F\u0449\u0451\u043D\u043D\u044B\u0439 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u043C \u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u044B\u043C \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u043C (FOSS): \u0435\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u043E\u0431\u043B\u0430\u0441\u0442\u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u043D\u043E\u0433\u043E \u043E\u0431\u0435\u0441\u043F\u0435\u0447\u0435\u043D\u0438\u044F, Linux, BSD \u0438 UNIX-\u043F\u043E\u0434\u043E\u0431\u043D\u044B\u0445 \u043E\u043F\u0435\u0440\u0430\u0446\u0438\u043E\u043D\u043D\u044B\u0445 \u0441\u0438\u0441\u0442\u0435\u043C.' # noqa + description = '\u0420\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u044B\u0439 \u043F\u043E\u0440\u0442\u0430\u043B, \u043F\u043E\u0441\u0432\u044F\u0449\u0451\u043D\u043D\u044B\u0439 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u043C \u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u044B\u043C \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u043C (FOSS): \u0435\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u043E\u0431\u043B\u0430\u0441\u0442\u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u043D\u043E\u0433\u043E \u043E\u0431\u0435\u0441\u043F\u0435\u0447\u0435\u043D\u0438\u044F, Linux, BSD \u0438 UNIX-\u043F\u043E\u0434\u043E\u0431\u043D\u044B\u0445 \u043E\u043F\u0435\u0440\u0430\u0446\u0438\u043E\u043D\u043D\u044B\u0445 \u0441\u0438\u0441\u0442\u0435\u043C.' # noqa: E501 publisher = 'Maxim Chirkov' category = 'blog' cover_url = u'https://www.opennet.ru/opennet.gif' diff --git a/recipes/optyczne_pl.recipe b/recipes/optyczne_pl.recipe index c0648ffc13..4dd2834e97 100644 --- a/recipes/optyczne_pl.recipe +++ b/recipes/optyczne_pl.recipe @@ -13,7 +13,6 @@ class OptyczneRecipe(BasicNewsRecipe): description = u'Najlepsze testy obiektywów, testy aparatów cyfrowych i testy lornetek w sieci!' cover_url = '' remove_empty_feeds = True - no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 recursions = 0 diff --git a/recipes/orfonline.recipe b/recipes/orfonline.recipe index 85035f4195..0d162ca4b8 100644 --- a/recipes/orfonline.recipe +++ b/recipes/orfonline.recipe @@ -20,7 +20,6 @@ class ORF(BasicNewsRecipe): masthead_url = 'https://www.orfonline.org/wp-content/uploads/2015/09/Logo_ORF_JPEG.jpg' remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url', 'title'} - remove_empty_feeds = True oldest_article = 7 simultaneous_downloads = 1 diff --git a/recipes/orient_21.recipe b/recipes/orient_21.recipe index 6b06434e79..94ac39e363 100644 --- a/recipes/orient_21.recipe +++ b/recipes/orient_21.recipe @@ -32,9 +32,9 @@ class OrientXXIRecipe(BasicNewsRecipe): ''' def default_cover(self, cover_file): - """ + ''' Crée une couverture personnalisée avec le logo - """ + ''' from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data @@ -50,7 +50,7 @@ class OrientXXIRecipe(BasicNewsRecipe): weekday = french_weekday[wkd] month = french_month[today.month] - date_str = f"{weekday} {today.day} {month} {today.year}" + date_str = f'{weekday} {today.day} {month} {today.year}' edition = today.strftime('Édition de %Hh') # Image de base diff --git a/recipes/osvitaua.recipe b/recipes/osvitaua.recipe index 540895f5b4..f1177cb293 100644 --- a/recipes/osvitaua.recipe +++ b/recipes/osvitaua.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Osvita(BasicNewsRecipe): title = '\u041E\u0441\u0432\u0456\u0442\u0430.ua' __author__ = 'bugmen00t' - description = '\u0423\u0441\u0435 \u043F\u0440\u043E \u043E\u0441\u0432\u0456\u0442\u0443 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u043A\u043E\u0440\u0434\u043E\u043D\u043E\u043C: \u0442\u0435\u043C\u0430\u0442\u0438\u0447\u043D\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441, \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043E\u0441\u0432\u0456\u0442\u0456 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u0457\u0457 \u043C\u0435\u0436\u0430\u043C\u0438, \u0454 \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0438\u043C \u0434\u0436\u0435\u0440\u0435\u043B\u043E\u043C \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0457 \u0443 \u0446\u0456\u0439 \u0441\u0444\u0435\u0440\u0456.' # noqa + description = '\u0423\u0441\u0435 \u043F\u0440\u043E \u043E\u0441\u0432\u0456\u0442\u0443 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u043A\u043E\u0440\u0434\u043E\u043D\u043E\u043C: \u0442\u0435\u043C\u0430\u0442\u0438\u0447\u043D\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441, \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043E\u0441\u0432\u0456\u0442\u0456 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430 \u0437\u0430 \u0457\u0457 \u043C\u0435\u0436\u0430\u043C\u0438, \u0454 \u0430\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0438\u043C \u0434\u0436\u0435\u0440\u0435\u043B\u043E\u043C \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0457 \u0443 \u0446\u0456\u0439 \u0441\u0444\u0435\u0440\u0456.' # noqa: E501 publisher = '\u041E\u0441\u0432\u0456\u0442\u0430.ua' category = 'blog' cover_url = u'http://osvita.ua/doc/i/Contacts-logo.jpg' diff --git a/recipes/osvitaua_ru.recipe b/recipes/osvitaua_ru.recipe index 0407e4dcfb..a98d8cb9e5 100644 --- a/recipes/osvitaua_ru.recipe +++ b/recipes/osvitaua_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Osvita(BasicNewsRecipe): title = '\u041E\u0441\u0432\u0456\u0442\u0430.ua' __author__ = 'bugmen00t' - description = '\u0422\u0435\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441, \u043F\u043E\u0441\u0432\u044F\u0449\u0451\u043D\u043D\u044B\u0439 \u043E\u0431\u0440\u0430\u0437\u043E\u0432\u0430\u043D\u0438\u044E \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0437\u0430 \u0440\u0443\u0431\u0435\u0436\u043E\u043C.' # noqa + description = '\u0422\u0435\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441, \u043F\u043E\u0441\u0432\u044F\u0449\u0451\u043D\u043D\u044B\u0439 \u043E\u0431\u0440\u0430\u0437\u043E\u0432\u0430\u043D\u0438\u044E \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0437\u0430 \u0440\u0443\u0431\u0435\u0436\u043E\u043C.' # noqa: E501 publisher = '\u041E\u0441\u0432\u0456\u0442\u0430.ua' category = 'blog' cover_url = u'http://osvita.ua/doc/i/Contacts-logo.jpg' diff --git a/recipes/ottawa_citizen.recipe b/recipes/ottawa_citizen.recipe index 7b0b152767..b3b35e4155 100644 --- a/recipes/ottawa_citizen.recipe +++ b/recipes/ottawa_citizen.recipe @@ -60,36 +60,36 @@ class CanWestPaper(BasicNewsRecipe): ] # un-comment the following six lines for the Vancouver Province - # title = u'Vancouver Province' - # url_prefix = 'http://www.theprovince.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' - # logo_url = 'vplogo.jpg' - # fp_tag = 'CAN_TP' + # # title = u'Vancouver Province' + # # url_prefix = 'http://www.theprovince.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg' + # # logo_url = 'vplogo.jpg' + # # fp_tag = 'CAN_TP' # un-comment the following six lines for the Vancouver Sun - # title = u'Vancouver Sun' - # url_prefix = 'http://www.vancouversun.com' - # description = u'News from Vancouver, BC' - # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' - # logo_url = 'vslogo.jpg' - # fp_tag = 'CAN_VS' + # # title = u'Vancouver Sun' + # # url_prefix = 'http://www.vancouversun.com' + # # description = u'News from Vancouver, BC' + # # std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg' + # # logo_url = 'vslogo.jpg' + # # fp_tag = 'CAN_VS' # un-comment the following six lines for the Calgary Herald - # title = u'Calgary Herald' - # url_prefix = 'http://www.calgaryherald.com' - # description = u'News from Calgary, AB' - # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' - # logo_url = 'chlogo.jpg' - # fp_tag = 'CAN_CH' + # # title = u'Calgary Herald' + # # url_prefix = 'http://www.calgaryherald.com' + # # description = u'News from Calgary, AB' + # # std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg' + # # logo_url = 'chlogo.jpg' + # # fp_tag = 'CAN_CH' # un-comment the following six lines for the Edmonton Journal - # title = u'Edmonton Journal' - # url_prefix = 'http://www.edmontonjournal.com' - # description = u'News from Edmonton, AB' - # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' - # logo_url = 'ejlogo.jpg' - # fp_tag = 'CAN_EJ' + # # title = u'Edmonton Journal' + # # url_prefix = 'http://www.edmontonjournal.com' + # # description = u'News from Edmonton, AB' + # # std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg' + # # logo_url = 'ejlogo.jpg' + # # fp_tag = 'CAN_EJ' # un-comment the following six lines for the Ottawa Citizen title = u'Ottawa Citizen' @@ -100,12 +100,12 @@ class CanWestPaper(BasicNewsRecipe): fp_tag = 'CAN_OC' # un-comment the following six lines for the Montreal Gazette -# title = u'Montreal Gazette' -# url_prefix = 'http://www.montrealgazette.com' -# description = u'News from Montreal, QC' -# std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg' -# logo_url = 'mglogo.jpg' -# fp_tag = 'CAN_MG' + # # title = u'Montreal Gazette' + # # url_prefix = 'http://www.montrealgazette.com' + # # description = u'News from Montreal, QC' + # # std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg' + # # logo_url = 'mglogo.jpg' + # # fp_tag = 'CAN_MG' Kindle_Fire = False masthead_url = std_logo_url @@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe): .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocredit { font-size: xx-small; font-weight: normal; }''' - keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] + keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})] remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( @@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe): name='div', attrs={'class': 'copyright'}), dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'id': 'soundoff'}), - dict(name='div', attrs={'id': re.compile('flyer')}), + dict(name='div', attrs={'id': re.compile(r'flyer')}), dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] def get_cover_url(self): @@ -154,8 +154,7 @@ class CanWestPaper(BasicNewsRecipe): except: while daysback < 7: cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg' + \ - str((date.today() - timedelta(days=daysback)).day) + \ - '/lg/' + self.fp_tag + '.jpg' + str((date.today() - timedelta(days=daysback)).day) + '/lg/' + self.fp_tag + '.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) @@ -164,24 +163,24 @@ class CanWestPaper(BasicNewsRecipe): continue break if daysback == 7: - self.log("\nCover unavailable") + self.log('\nCover unavailable') cover = None return cover def fixChars(self, string): # Replace lsquo (\x91) - fixed = re.sub("\x91", "‘", string) + fixed = string.replace('\x91', '‘') # Replace rsquo (\x92) - fixed = re.sub("\x92", "’", fixed) + fixed = fixed.replace('\x92', '’') # Replace ldquo (\x93) - fixed = re.sub("\x93", "“", fixed) + fixed = fixed.replace('\x93', '“') # Replace rdquo (\x94) - fixed = re.sub("\x94", "”", fixed) + fixed = fixed.replace('\x94', '”') # Replace ndash (\x96) - fixed = re.sub("\x96", "–", fixed) + fixed = fixed.replace('\x96', '–') # Replace mdash (\x97) - fixed = re.sub("\x97", "—", fixed) - fixed = re.sub("’", "’", fixed) + fixed = fixed.replace('\x97', '—') + fixed = fixed.replace('’', '’') return fixed def massageNCXText(self, description): @@ -214,7 +213,7 @@ class CanWestPaper(BasicNewsRecipe): divtags = soup.findAll('div', attrs={'id': ''}) if divtags: for div in divtags: - del(div['id']) + del div['id'] pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps @@ -262,10 +261,10 @@ class CanWestPaper(BasicNewsRecipe): if url.startswith('/'): url = self.url_prefix + url if not url.startswith(self.url_prefix): - print("Rejected " + url) + print('Rejected ' + url) return if url in self.url_list: - print("Rejected dup " + url) + print('Rejected dup ' + url) return self.url_list.append(url) title = self.tag_to_string(atag, False) @@ -277,8 +276,8 @@ class CanWestPaper(BasicNewsRecipe): return dtag = adiv.find('div', 'content') description = '' - print("URL " + url) - print("TITLE " + title) + print('URL ' + url) + print('TITLE ' + title) if dtag is not None: stag = dtag.span if stag is not None: @@ -286,18 +285,18 @@ class CanWestPaper(BasicNewsRecipe): description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) - print("DESCRIPTION: " + description) + print('DESCRIPTION: ' + description) if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) def parse_web_index(key, keyurl): - print("Section: " + key + ': ' + self.url_prefix + keyurl) + print('Section: ' + key + ': ' + self.url_prefix + keyurl) try: soup = self.index_to_soup(self.url_prefix + keyurl) except: - print("Section: " + key + ' NOT FOUND') + print('Section: ' + key + ' NOT FOUND') return ans.append(key) mainsoup = soup.find('div', 'bodywrapper') @@ -309,7 +308,7 @@ class CanWestPaper(BasicNewsRecipe): for wdiv in mainsoup.findAll(attrs={'class': ['headline', 'featurecontent']}): handle_article(wdiv, key) - for (k, url) in self.postmedia_index_pages: + for k,url in self.postmedia_index_pages: parse_web_index(k, url) ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/ourdailybread.recipe b/recipes/ourdailybread.recipe index 1bbb87f984..a7a933d5b5 100644 --- a/recipes/ourdailybread.recipe +++ b/recipes/ourdailybread.recipe @@ -66,7 +66,7 @@ class OurDailyBread(BasicNewsRecipe): div.set('class', 'calibre-inserted-psalm') hr = div.makeelement('hr') div.insert(0, hr) - # print html.tostring(div) + # print(html.tostring(div)) raw = html.tostring(root, encoding='unicode') return raw diff --git a/recipes/outlook_business_magazine.recipe b/recipes/outlook_business_magazine.recipe index 1ab5abbaa4..50fe0b6ffd 100644 --- a/recipes/outlook_business_magazine.recipe +++ b/recipes/outlook_business_magazine.recipe @@ -48,7 +48,7 @@ class outlook(BasicNewsRecipe): return [('Articles', ans)] def preprocess_raw_html(self, raw, *a): - m = re.search('id="__NEXT_DATA__" type="application/json">', raw) + m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw) raw = raw[m.start():] raw = raw.split('>', 1)[1] data = json.JSONDecoder().raw_decode(raw)[0] diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe index 71f40a34c8..0f7128ee92 100644 --- a/recipes/outlook_india.recipe +++ b/recipes/outlook_india.recipe @@ -8,7 +8,7 @@ class outlook(BasicNewsRecipe): __author__ = 'unkn0wn' description = ( 'Outlook covers the latest India news, analysis, business news and long-form stories on culture,' - ' money market and personal finance. Read India\'s best online magazine.' + " money market and personal finance. Read India's best online magazine." ) language = 'en_IN' use_embedded_content = False diff --git a/recipes/pagina12.recipe b/recipes/pagina12.recipe index 54ab8ae358..3276fa4c7f 100644 --- a/recipes/pagina12.recipe +++ b/recipes/pagina12.recipe @@ -33,7 +33,7 @@ class Pagina12(BasicNewsRecipe): articles_are_obfuscated = True temp_files = [] fetch_retries = 10 - extra_css = """ + extra_css = ''' body{font-family: "Open Sans", sans-serif} .article-date{font-size: small; margin-bottom: 1em;} .article-title{font-size: x-large; font-weight: bold; display: block; margin-bottom: 1em; margin-top: 1em;} @@ -43,7 +43,7 @@ class Pagina12(BasicNewsRecipe): img{margin-top:1em; margin-bottom: 1em; display:block} .article-text p:first-letter{display: inline; font-size: xx-large; font-weight: bold} .article-prefix{font-family: "Archivo Narrow",Helvetica,sans-serif; font-size: small; text-transform: uppercase;} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language @@ -76,22 +76,22 @@ class Pagina12(BasicNewsRecipe): return br feeds = [ - (u'Diario de hoy' , u'https://www.pagina12.com.ar/rss/edicion-impresa'), - (u'Espectaculos' , u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'), - (u'Radar' , u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'), - (u'Radar libros' , u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'), - (u'Cash' , u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'), - (u'NO' , u'https://www.pagina12.com.ar/rss/suplementos/no/notas'), - (u'Las 12' , u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'), - (u'Soy' , u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'), - (u'M2' , u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'), - (u'Rosario 12' , u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas') + (u'Diario de hoy', u'https://www.pagina12.com.ar/rss/edicion-impresa'), + (u'Espectaculos', u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'), + (u'Radar', u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'), + (u'Radar libros', u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'), + (u'Cash', u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'), + (u'NO', u'https://www.pagina12.com.ar/rss/suplementos/no/notas'), + (u'Las 12', u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'), + (u'Soy', u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'), + (u'M2', u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'), + (u'Rosario 12', u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas') ] def get_cover_url(self): lurl = strftime('https://www.pagina12.com.ar/edicion-impresa/%d-%m-%Y') soup = self.index_to_soup(lurl) - mydiv = soup.find('div', {'class' : lambda x: x and 'printed-edition-cover' in x.split()}) + mydiv = soup.find('div', {'class': lambda x: x and 'printed-edition-cover' in x.split()}) if mydiv: for image in mydiv.findAll('img'): if image['src'].startswith('https://images.pagina12.com.ar/styles/width700/public/'): @@ -112,6 +112,6 @@ class Pagina12(BasicNewsRecipe): self.temp_files.append(tfile) result = tfile.name except: - self.info("Retrying download...") + self.info('Retrying download...') count += 1 return result diff --git a/recipes/pajama.recipe b/recipes/pajama.recipe index 93bd3dab7e..4b556d2db4 100644 --- a/recipes/pajama.recipe +++ b/recipes/pajama.recipe @@ -18,4 +18,3 @@ class PajamasMedia(BasicNewsRecipe): 'http://feeds.feedburner.com/PajamasMedia'), ] - diff --git a/recipes/paperpaper.recipe b/recipes/paperpaper.recipe index dbe6fc7c5f..5bb6705753 100644 --- a/recipes/paperpaper.recipe +++ b/recipes/paperpaper.recipe @@ -9,8 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class PaperPaper(BasicNewsRecipe): title = '\u0411\u0443\u043C\u0430\u0433\u0430' __author__ = 'bugmen00t' - description = '\u0418\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u043F\u0440\u0435\u043A\u0440\u0430\u0441\u043D\u043E\u043C \u0433\u043E\u0440\u043E\u0434\u0435 \u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435, \u0432 \u043A\u043E\u0442\u043E\u0440\u043E\u043C, \u043A\u043E\u043D\u0435\u0447\u043D\u043E, \u0434\u0430\u043B\u0435\u043A\u043E \u043D\u0435 \u0432\u0441\u0451 \u0438\u0434\u0435\u0430\u043B\u044C\u043D\u043E, \u2014 \u0438 \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u043C\u044B \u0437\u0430\u043D\u0438\u043C\u0430\u0435\u043C\u0441\u044F \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u043E\u0439, \u0447\u0442\u043E\u0431\u044B \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u044C \u0432\u043D\u0438\u043C\u0430\u043D\u0438\u0435 \u043A \u0432\u0430\u0436\u043D\u044B\u043C \u0434\u043B\u044F \u0432\u0441\u0435\u0445 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u0430\u043C \u0438 \u0432\u043B\u0438\u044F\u0442\u044C \u043D\u0430 \u0438\u0445 \u0440\u0435\u0448\u0435\u043D\u0438\u0435.' # noqa - publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u0410\u0440\u0442\u0451\u043C\u0435\u043D\u043A\u043E, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u0418\u0432\u0430\u043D\u043E\u0432\u0430' # noqa + description = '\u0418\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u043F\u0440\u0435\u043A\u0440\u0430\u0441\u043D\u043E\u043C \u0433\u043E\u0440\u043E\u0434\u0435 \u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435, \u0432 \u043A\u043E\u0442\u043E\u0440\u043E\u043C, \u043A\u043E\u043D\u0435\u0447\u043D\u043E, \u0434\u0430\u043B\u0435\u043A\u043E \u043D\u0435 \u0432\u0441\u0451 \u0438\u0434\u0435\u0430\u043B\u044C\u043D\u043E, \u2014 \u0438 \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u043C\u044B \u0437\u0430\u043D\u0438\u043C\u0430\u0435\u043C\u0441\u044F \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u043E\u0439, \u0447\u0442\u043E\u0431\u044B \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u044C \u0432\u043D\u0438\u043C\u0430\u043D\u0438\u0435 \u043A \u0432\u0430\u0436\u043D\u044B\u043C \u0434\u043B\u044F \u0432\u0441\u0435\u0445 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u0430\u043C \u0438 \u0432\u043B\u0438\u044F\u0442\u044C \u043D\u0430 \u0438\u0445 \u0440\u0435\u0448\u0435\u043D\u0438\u0435.' # noqa: E501 + publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u0410\u0440\u0442\u0451\u043C\u0435\u043D\u043A\u043E, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u0418\u0432\u0430\u043D\u043E\u0432\u0430' # noqa: E501 category = 'newspaper' cover_url = u'https://upload.wikimedia.org/wikipedia/commons/1/1f/Paperpaper_logo.jpg' language = 'ru' @@ -24,7 +24,7 @@ class PaperPaper(BasicNewsRecipe): remove_tags_after = dict(name='article') - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'bottom-block '}), dict(name='div', attrs={'class': 'bottom-block news'}) ] diff --git a/recipes/parisreview.recipe b/recipes/parisreview.recipe index e670ff7a53..49050111bf 100644 --- a/recipes/parisreview.recipe +++ b/recipes/parisreview.recipe @@ -21,6 +21,6 @@ class ParisReview(BasicNewsRecipe): # remove_attributes = ['style',] # keep_only_tags = [] remove_tags = [] - # remove_tags_before = dict() - remove_tags_after = dict() + # remove_tags_before = {} + remove_tags_after = {} feeds = [('Posts', 'http://feeds.feedburner.com/TheParisReviewBlog')] diff --git a/recipes/parlamentni_listy.recipe b/recipes/parlamentni_listy.recipe index 27bd17b4e3..adcfc3b259 100644 --- a/recipes/parlamentni_listy.recipe +++ b/recipes/parlamentni_listy.recipe @@ -32,4 +32,4 @@ class plRecipe(BasicNewsRecipe): preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*', re.DOTALL | re.IGNORECASE), lambda match: '')] - keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})] + keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})] diff --git a/recipes/parool.recipe b/recipes/parool.recipe index fd674d1522..3c2ef05987 100644 --- a/recipes/parool.recipe +++ b/recipes/parool.recipe @@ -26,7 +26,7 @@ class Parool(BasicNewsRecipe): dict(attrs={'data-element-id': ['article-element-authors']}), dict(name=['script', 'noscript', 'style']), ] - remove_attributes = ["class", "id", "name", "style"] + remove_attributes = ['class', 'id', 'name', 'style'] encoding = 'utf-8' no_stylesheets = True ignore_duplicate_articles = {'url'} @@ -50,7 +50,7 @@ class Parool(BasicNewsRecipe): teaser_label = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__label'})).strip() teaser_sublabel = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__sublabel'})).strip() teaser_title = self.tag_to_string(header.find('h3').find('span', attrs={'class': 'teaser__title__value--short'})).strip() - ignore = { "dirkjan", "s1ngle", "pukkels", "hein de kort" } + ignore = {'dirkjan', 's1ngle', 'pukkels', 'hein de kort'} if teaser_label.lower() in ignore: continue parts = [] @@ -74,13 +74,13 @@ class Parool(BasicNewsRecipe): if tag['src'][0] == '/': tag['src'] = 'https://www.parool.nl' + tag['src'] for tag in soup(): - if tag.name == "picture": - tag.replaceWith(tag.find("img")) + if tag.name == 'picture': + tag.replaceWith(tag.find('img')) comic_articles = { - "Alle strips van Dirkjan", - "S1NGLE", - "Pukkels", - "Bekijk hier alle cartoons van Hein de Kort", + 'Alle strips van Dirkjan', + 'S1NGLE', + 'Pukkels', + 'Bekijk hier alle cartoons van Hein de Kort', } if self.tag_to_string(soup.find('h1')).strip() in comic_articles: for node in soup.find('figure').find_next_siblings(): @@ -93,8 +93,8 @@ class Parool(BasicNewsRecipe): 'Accept': 'application/json, text/javascript, */*; q=0.01', 'DNT': '1', } - url = "https://login-api.e-pages.dk/v1/krant.parool.nl/folders" + url = 'https://login-api.e-pages.dk/v1/krant.parool.nl/folders' with closing(self.browser.open(Request(url, None, headers))) as r: folders = json.loads(r.read()) - return folders["objects"][0]["teaser_medium"] + return folders['objects'][0]['teaser_medium'] return None diff --git a/recipes/pc_advisor.recipe b/recipes/pc_advisor.recipe index e165607646..f2ab13271f 100644 --- a/recipes/pc_advisor.recipe +++ b/recipes/pc_advisor.recipe @@ -14,7 +14,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class pcAdvisor(BasicNewsRecipe): __author__ = 'Lorenzo Vigentini' - description = 'PC Advisor delivers expert advice you can trust to business and home PC users who want to buy the best-value equipment and make the most out of the equipment they already own.' # noqa + description = 'PC Advisor delivers expert advice you can trust to business and home PC users who want to buy the best-value equipment and make the most out of the equipment they already own.' # noqa: E501 cover_url = 'http://media.pcadvisor.co.uk/images/spacer.gif' title = 'Pc Advisor ' diff --git a/recipes/pc_foster.recipe b/recipes/pc_foster.recipe index d73fa0cea4..993a597301 100644 --- a/recipes/pc_foster.recipe +++ b/recipes/pc_foster.recipe @@ -9,7 +9,7 @@ class PC_Foster(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 __author__ = 'fenuks' - description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.' # noqa + description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.' # noqa: E501 category = 'IT' language = 'pl' masthead_url = 'http://pcfoster.pl/public/images/logo.png' diff --git a/recipes/pecat.recipe b/recipes/pecat.recipe index 6426d36221..4d70704a99 100644 --- a/recipes/pecat.recipe +++ b/recipes/pecat.recipe @@ -25,11 +25,11 @@ class Pecat_rs(BasicNewsRecipe): ignore_duplicate_articles = {'url'} needs_subscription = 'optional' publication_type = 'magazine' - extra_css = """ + extra_css = ''' body{font-family: Arial,Helvetica,sans1,sans-serif} img{display: block; margin-bottom: 1em; margin-top: 1em} p{display: block; margin-bottom: 1em; margin-top: 1em} - """ + ''' conversion_options = { 'comment': description, 'tags': 'politika, Srbija', 'publisher': 'Pecat', 'language': language diff --git a/recipes/people_daily.recipe b/recipes/people_daily.recipe index 4ad18a436c..e03f10a52d 100644 --- a/recipes/people_daily.recipe +++ b/recipes/people_daily.recipe @@ -20,7 +20,6 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True encoding = 'GB2312' - language = 'zh' conversion_options = {'linearize_tables': True} masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif' @@ -89,11 +88,11 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe): # dict(name='p'), # ] remove_tags = [ - dict(name='div', class_="channel cf") + dict(name='div', class_='channel cf') ] - remove_tags_before = [dict(name='div', class_="layout rm_txt cf")] - remove_tags_after = [dict(name='div', class_="edit cf")] + remove_tags_before = [dict(name='div', class_='layout rm_txt cf')] + remove_tags_after = [dict(name='div', class_='edit cf')] def append_page(self, soup, appendtag, position): pager = soup.find('img', attrs={'src': '/img/next_b.gif'}) @@ -129,12 +128,12 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe): year = time.strftime('%Y') month = time.strftime('%m') day = time.strftime('%d') - cover = 'http://paper.people.com.cn/rmrb/images/' + year + '-' + \ - month + '/' + day + '/01/rmrb' + year + month + day + '01_b.jpg' + cover = ('http://paper.people.com.cn/rmrb/images/' + year + '-' + + month + '/' + day + '/01/rmrb' + year + month + day + '01_b.jpg') br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - self.log("\nCover unavailable: " + cover) + self.log('\nCover unavailable: ' + cover) cover = None return cover diff --git a/recipes/pescanik.recipe b/recipes/pescanik.recipe index 985568768b..b5978c82a6 100644 --- a/recipes/pescanik.recipe +++ b/recipes/pescanik.recipe @@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Pescanik(BasicNewsRecipe): title = u'Peščanik' __author__ = 'Darko Miletic' - description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH' # noqa + description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH' # noqa: E501 publisher = u'Peščanik' category = 'news, politics, Serbia' oldest_article = 10 @@ -24,17 +24,17 @@ class Pescanik(BasicNewsRecipe): language = 'sr' publication_type = 'newsportal' masthead_url = 'http://pescanik.net/wp-content/uploads/2011/10/logo1.png' - extra_css = """ + extra_css = ''' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Tahoma,sans1,sans-serif} #BlogTitle{font-size: xx-large; font-weight: bold} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] # noqa: RUF039 remove_tags = [ dict(name=['object', 'link', 'meta', 'script', 'iframe', 'embed'])] keep_only_tags = [ diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 21fa9ef565..0ed4f64db7 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -82,7 +82,7 @@ class PhilosophyNow(BasicNewsRecipe): if section_title not in feeds: feeds[section_title] = [] feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] + ans = list(feeds.items()) return ans # PN changes the content it delivers based on cookies, so the diff --git a/recipes/pocket_lint.recipe b/recipes/pocket_lint.recipe index c63562d2f3..f50b023b59 100644 --- a/recipes/pocket_lint.recipe +++ b/recipes/pocket_lint.recipe @@ -20,12 +20,12 @@ class AdvancedUserRecipe1718387215(BasicNewsRecipe): auto_cleanup = True feeds = [ - #Segments + # Segments ('Devices', 'https://www.pocket-lint.com/feed/devices-segment/'), ('Entertainment', 'https://www.pocket-lint.com/feed/entertainment-segment/'), ('Lifestyle', 'https://www.pocket-lint.com/feed/lifestyle-segment/'), ('Audio Video', 'https://www.pocket-lint.com/feed/audio-video-segment/'), - #Segments + # Segments ('News', 'https://www.pocket-lint.com/feed/news/'), ('Features', 'https://www.pocket-lint.com/feed/features/'), ('Editorials', 'https://www.pocket-lint.com/feed/editorials/'), @@ -33,13 +33,13 @@ class AdvancedUserRecipe1718387215(BasicNewsRecipe): ('Lists', 'https://www.pocket-lint.com/feed/lists/'), ('Guides', 'https://www.pocket-lint.com/feed/guides/'), ('Hubs', 'https://www.pocket-lint.com/feed/hubs/'), - #Best Products + # Best Products ('Reviews', 'https://www.pocket-lint.com/feed/reviews/'), ('Buyer’s Guides', 'https://www.pocket-lint.com/feed/buyers-guides/'), ('Comparisons', 'https://www.pocket-lint.com/feed/comparisons/'), ('Gift Guides', 'https://www.pocket-lint.com/feed/gift-guides/'), ('Deals', 'https://www.pocket-lint.com/feed/deals/'), - #Topics + # Topics ('Apps', 'https://www.pocket-lint.com/feed/apps/'), ('Augmented Reality & Virtual Reality', 'https://www.pocket-lint.com/feed/ar-vr/'), ('Cameras', 'https://www.pocket-lint.com/feed/cameras/'), diff --git a/recipes/podnikatel.recipe b/recipes/podnikatel.recipe index 785bbad039..c374860863 100644 --- a/recipes/podnikatel.recipe +++ b/recipes/podnikatel.recipe @@ -19,9 +19,9 @@ class PodnikatelCZ(BasicNewsRecipe): publication_type = 'newsportal' no_stylesheets = True remove_javascript = True - extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \ - p.perex img {display:none;} \ - .urs p {margin: 0 0 0.8em 0;}' + extra_css = '''p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} + p.perex img {display:none;} + .urs p {margin: 0 0 0.8em 0;}''' feeds = [ (u'Aktuality', u'http://rss.podnikatel.cz/aktuality'), diff --git a/recipes/poligon.recipe b/recipes/poligon.recipe index acd9c4ff93..1aab94d713 100644 --- a/recipes/poligon.recipe +++ b/recipes/poligon.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Poligon(BasicNewsRecipe): title = '\u041F\u043E\u043B\u0438\u0433\u043E\u043D' __author__ = 'bugmen00t' - description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0432\u044B\u043F\u0443\u0441\u043A\u0430\u0435\u043C\u043E\u0435 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u043D\u0435\u0441\u043A\u043E\u043B\u044C\u043A\u0438\u0445 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0445 \u0438\u0437\u0434\u0430\u043D\u0438\u0439, \u043F\u043E\u0434\u0432\u0435\u0440\u0433\u0448\u0438\u0445\u0441\u044F \u0434\u0430\u0432\u043B\u0435\u043D\u0438\u044E \u0441\u043E \u0441\u0442\u043E\u0440\u043E\u043D\u044B \u0433\u043E\u0441\u0443\u0434\u0430\u0440\u0441\u0442\u0432\u0430. \u041F\u0438\u0448\u0435\u043C \u043E \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043D\u0435 \u0442\u043E\u043B\u044C\u043A\u043E.' # noqa + description = '\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0432\u044B\u043F\u0443\u0441\u043A\u0430\u0435\u043C\u043E\u0435 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u043D\u0435\u0441\u043A\u043E\u043B\u044C\u043A\u0438\u0445 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0445 \u0438\u0437\u0434\u0430\u043D\u0438\u0439, \u043F\u043E\u0434\u0432\u0435\u0440\u0433\u0448\u0438\u0445\u0441\u044F \u0434\u0430\u0432\u043B\u0435\u043D\u0438\u044E \u0441\u043E \u0441\u0442\u043E\u0440\u043E\u043D\u044B \u0433\u043E\u0441\u0443\u0434\u0430\u0440\u0441\u0442\u0432\u0430. \u041F\u0438\u0448\u0435\u043C \u043E \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043D\u0435 \u0442\u043E\u043B\u044C\u043A\u043E.' # noqa: E501 publisher = 'poligon.media' category = 'news' cover_url = u'https://www.plgnmedia.io/wp-content/uploads/2022/02/poligon-share.png' diff --git a/recipes/poliitico_eu.recipe b/recipes/poliitico_eu.recipe index 44f4e9f484..666dfcae67 100644 --- a/recipes/poliitico_eu.recipe +++ b/recipes/poliitico_eu.recipe @@ -19,7 +19,7 @@ class Politico(BasicNewsRecipe): ' intelligence about European politics and policy. Download Weekly.') publisher = 'Axel Springer SE.' category = 'news, politics, Europe' - oldest_article = 7 # days + oldest_article = 7 # days max_articles_per_feed = 20 use_embedded_content = False no_stylesheets = True diff --git a/recipes/politiko_dk.recipe b/recipes/politiko_dk.recipe index 5d5228c6c3..82a602eb8e 100644 --- a/recipes/politiko_dk.recipe +++ b/recipes/politiko_dk.recipe @@ -26,10 +26,10 @@ class PolitikoDK(BasicNewsRecipe): auto_cleanup = False keep_only_tags = [ - dict(name="h1", attrs={'class': 'article-headline'}), - dict(name="p", attrs={'class': 'article-summary'}), - dict(name="div", attrs={'class': 'article-date'}), - dict(name="div", attrs={'class': 'article-content'}), + dict(name='h1', attrs={'class': 'article-headline'}), + dict(name='p', attrs={'class': 'article-summary'}), + dict(name='div', attrs={'class': 'article-date'}), + dict(name='div', attrs={'class': 'article-content'}), ] # Feed are found here: http://www.b.dk/rss diff --git a/recipes/polizeipress_de.recipe b/recipes/polizeipress_de.recipe index 86a511331e..5c89167a6e 100644 --- a/recipes/polizeipress_de.recipe +++ b/recipes/polizeipress_de.recipe @@ -5,7 +5,10 @@ class AdvancedUserRecipe(BasicNewsRecipe): title = u'Polizeipresse - Deutschland' __author__ = 'schuster' - description = 'Tagesaktuelle "Polizeiberichte" aus ganz Deutschland (bis auf Ortsebene).' 'Um deinen Ort/Stadt/Kreis usw. einzubinden, gehe auf "http://www.presseportal.de/polizeipresse/" und suche im oberen "Suchfeld" nach dem Namen.' 'Oberhalb der Suchergebnisse (Folgen:) auf den üblichen link zu den RSS-Feeds klicken und den RSS-link im Rezept unter "feeds" eintragen wie üblich.' 'Die Auswahl von Orten kann vereinfacht werden wenn man den Suchbegriff wie folgt eingibt:' '"Stadt-Ort".' # noqa + description = ('Tagesaktuelle "Polizeiberichte" aus ganz Deutschland (bis auf Ortsebene). ' + 'Um deinen Ort/Stadt/Kreis usw. einzubinden, gehe auf "http://www.presseportal.de/polizeipresse/" und suche im oberen "Suchfeld" nach dem Namen. ' + 'Oberhalb der Suchergebnisse (Folgen:) auf den üblichen link zu den RSS-Feeds klicken und den RSS-link im Rezept unter "feeds" eintragen wie üblich. ' + 'Die Auswahl von Orten kann vereinfacht werden wenn man den Suchbegriff wie folgt eingibt: "Stadt-Ort".') oldest_article = 21 max_articles_per_feed = 100 no_stylesheets = True @@ -22,8 +25,7 @@ class AdvancedUserRecipe(BasicNewsRecipe): def print_version(self, url): segments = url.split('/') - printURL = 'http://www.presseportal.de/print.htx?nr=' + \ - '/'.join(segments[5:6]) + '&type=polizei' + printURL = 'http://www.presseportal.de/print.htx?nr=' + '/'.join(segments[5:6]) + '&type=polizei' return printURL feeds = [(u'Frimmerdorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-frimmersdorf&w=public_service'), diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index ada19901fd..f5cf9e631e 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -7,10 +7,10 @@ from calibre.web.feeds.news import BasicNewsRecipe class Polter(BasicNewsRecipe): title = u'Polter.pl' __author__ = 'fenuks' - description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' # noqa + description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' # noqa: E501 category = 'fantasy, books, rpg, games' language = 'pl' - extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;} .calibre_navbar {clear: both;} .p_title {font-weight: bold;} .p_image {margin-left: auto; margin-right: auto; display: block;} .italic {font-style: italic;}' # noqa + extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;} .calibre_navbar {clear: both;} .p_title {font-weight: bold;} .p_image {margin-left: auto; margin-right: auto; display: block;} .italic {font-style: italic;}' # noqa: E501 cover_url = 'http://static.polter.pl/sub/promo/bpromo2524.jpg' use_embedded_content = False oldest_article = 7 @@ -41,9 +41,9 @@ class Polter(BasicNewsRecipe): (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')] def preprocess_html(self, soup): - for s in soup.findAll(attrs={'style': re.compile('float: ?left')}): + for s in soup.findAll(attrs={'style': re.compile(r'float: ?left')}): s['class'] = 'floatleft' - for s in soup.findAll(attrs={'style': re.compile('float: ?right')}): + for s in soup.findAll(attrs={'style': re.compile(r'float: ?right')}): s['class'] = 'floatright' for s in soup.findAll(style=True): if 'bold;' in s['style']: diff --git a/recipes/popscience.recipe b/recipes/popscience.recipe index 002b266e9f..e86741c363 100644 --- a/recipes/popscience.recipe +++ b/recipes/popscience.recipe @@ -26,7 +26,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): classes('Article-header Article-excerpt Article-author Article-thumbnail Article-bodyText article-title article-dek article-paragraph articlebody'), ] remove_tags = [ - dict(name='section', attrs={'class': ['recurrent-share']}) + dict(name='section', attrs={'class': ['recurrent-share']}) ] def parse_section_index(self, slug): diff --git a/recipes/portafolio.recipe b/recipes/portafolio.recipe index ffdc530f85..df17cfb4dc 100644 --- a/recipes/portafolio.recipe +++ b/recipes/portafolio.recipe @@ -20,12 +20,12 @@ class AdvancedUserRecipe1311799898(BasicNewsRecipe): masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' publication_type = 'newspaper' - extra_css = """ + extra_css = ''' p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } - """ + ''' feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'), (u'Economia', u'http://www.portafolio.co/economia/feed'), diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe index 354b5eec5d..e5e7279ab4 100644 --- a/recipes/ppe_pl.recipe +++ b/recipes/ppe_pl.recipe @@ -11,7 +11,6 @@ class ppeRecipe(BasicNewsRecipe): category = u'News' description = u'Portal o konsolach i grach wideo.' extra_css = '.categories > li {list-style: none; display: inline;} .galmini > li {list-style: none; float: left;} .calibre_navbar {clear: both;}' - remove_empty_feeds = True no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/pravda_por.recipe b/recipes/pravda_por.recipe index 274576b4ee..31b88dec59 100644 --- a/recipes/pravda_por.recipe +++ b/recipes/pravda_por.recipe @@ -22,10 +22,10 @@ class Pravda_port(BasicNewsRecipe): remove_empty_feeds = True publication_type = 'newspaper' masthead_url = 'http://port.pravda.ru/pix/logo.gif' - extra_css = """ + extra_css = ''' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em; display:block} - """ + ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language diff --git a/recipes/pravda_uk.recipe b/recipes/pravda_uk.recipe index 32ffbeb26c..360d9dbf31 100644 --- a/recipes/pravda_uk.recipe +++ b/recipes/pravda_uk.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class PravdaUAUA(BasicNewsRecipe): title = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 \u043F\u0440\u0430\u0432\u0434\u0430' __author__ = 'bugmen00t' - description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435 \u0441\u0443\u0441\u043F\u0456\u043B\u044C\u043D\u043E-\u043F\u043E\u043B\u0456\u0442\u0438\u0447\u043D\u0435 \u0456\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0417\u041C\u0406' # noqa + description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435 \u0441\u0443\u0441\u043F\u0456\u043B\u044C\u043D\u043E-\u043F\u043E\u043B\u0456\u0442\u0438\u0447\u043D\u0435 \u0456\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0417\u041C\u0406' # noqa: E501 publisher = 'pravda.com.ua' category = 'newspaper' cover_url = u'https://img.pravda.com/images/up_for_fb.gif' diff --git a/recipes/pravda_ukraine_ru.recipe b/recipes/pravda_ukraine_ru.recipe index e8dd1a9ed1..6a605cd8dd 100644 --- a/recipes/pravda_ukraine_ru.recipe +++ b/recipes/pravda_ukraine_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class PravdaUARU(BasicNewsRecipe): title = '\u0423\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u0430\u044F \u043F\u0440\u0430\u0432\u0434\u0430' __author__ = 'bugmen00t' - description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u043D\u0430\u044F \u0442\u0435\u043C\u0430\u0442\u0438\u043A\u0430 \u2014 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430, \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u044B, \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430. ' # noqa + description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043E\u0441\u043D\u043E\u0432\u043D\u0430\u044F \u0442\u0435\u043C\u0430\u0442\u0438\u043A\u0430 \u2014 \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430, \u0441\u043E\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u044B, \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430. ' # noqa: E501 publisher = 'pravda.com.ua' category = 'newspaper' cover_url = u'https://img.pravda.com/images/up_for_fb.gif' diff --git a/recipes/presse_portal.recipe b/recipes/presse_portal.recipe index eda9430236..fae834986e 100644 --- a/recipes/presse_portal.recipe +++ b/recipes/presse_portal.recipe @@ -23,8 +23,7 @@ class PressePortalDE(BasicNewsRecipe): description = u'Presseportal News Feed DE' # add date to description so for dayly downloads you can find them easier # ---- can be edit by user - description = description + ' fetched: ' + \ - datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") + description = description + ' fetched: ' + datetime.now().strftime('%Y-%m-%d') # %H:%M:%S") # Who published the content? publisher = u'Presseportal.de' # What is the content of? @@ -142,6 +141,6 @@ class PressePortalDE(BasicNewsRecipe): # and split this at the symbol '.' (if there) : ('3120111',) # from this take the first tuple. '3120111' side = (url.rpartition('/')[-1]).rsplit('.', 1)[0] - # print 'ConvertPrintURL from : ' + url + '/n to ' + side + # print('ConvertPrintURL from : ' + url + '/n to ' + side) printurl = 'http://www.presseportal.de/print/' + side + '-print.html' return printurl diff --git a/recipes/private_eye.recipe b/recipes/private_eye.recipe index 5881eb5b90..f00ffaf4d4 100644 --- a/recipes/private_eye.recipe +++ b/recipes/private_eye.recipe @@ -77,10 +77,10 @@ class PrivateEyeRecipe(BasicNewsRecipe): try: day, month, year = next_issue_text.split(' ') day = ''.join(c for c in day if c.isdigit()) - pub_date = datetime.strptime(" ".join((day, month, year)), "%d %B %Y") - timedelta(12) + pub_date = datetime.strptime(' '.join((day, month, year)), '%d %B %Y') - timedelta(12) self.log('pub-date:', pub_date) - self.conversion_options.update({'pubdate': datetime.strftime(pub_date, "%d %B %Y").lstrip("0")}) - title = self.title + " " + datetime.strftime(pub_date, "%Y-%m-%d") + self.conversion_options.update({'pubdate': datetime.strftime(pub_date, '%d %B %Y').lstrip('0')}) + title = self.title + ' ' + datetime.strftime(pub_date, '%Y-%m-%d') self.conversion_options.update({'title': title}) self.conversion_options.update({'title_sort': title}) except (TypeError, ValueError): @@ -124,24 +124,24 @@ class PrivateEyeRecipe(BasicNewsRecipe): def preprocess_html(self, soup): # Remove tag link to crossword image for tag in soup.findAll('a', {'href': re.compile(r'/pictures/crossword/')}): - self.log("Removing link to crossword image...") + self.log('Removing link to crossword image...') tag.unwrap() # Remove align tag in crossword image (so float right works) for tag in soup.findAll('img', {'src': re.compile(r'/pictures/crossword/')}): - if "align" in tag.attrs: - self.log("Removing crossword image align attribute...") + if 'align' in tag.attrs: + self.log('Removing crossword image align attribute...') del tag.attrs['align'] return soup # We remove vast swathes of HTML which is not part of the articles. # Remove sibling content - remove_tags_before = [ - {'name': 'div', 'class': "article"}, - {'name': 'div', 'id': "page"}, - {'name': 'div', 'id': "page-wide"}, - {'name': 'div', 'id': "content"}, + remove_tags_before = [ + {'name': 'div', 'class': 'article'}, + {'name': 'div', 'id': 'page'}, + {'name': 'div', 'id': 'page-wide'}, + {'name': 'div', 'id': 'content'}, {'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}}, ] remove_tags_after = remove_tags_before.copy() @@ -161,9 +161,9 @@ class PrivateEyeRecipe(BasicNewsRecipe): {'name': 'div', 'attrs': {'id': 'about-covers'}}, {'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}}, {'name': 'iframe'}, - {'name': 'link', 'attrs': {'href': re.compile('/javastyle/lightbox/')}}, - {'name': 'link', 'attrs': {'href': re.compile('/javastyle/news_ticker/')}}, - {'name': 'link', 'attrs': {'href': re.compile('/javastyle/media-queries-')}}, + {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/lightbox/')}}, + {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/news_ticker/')}}, + {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/media-queries-')}}, ] # Convert headers to h1, strapline to h4 diff --git a/recipes/pro_physik.recipe b/recipes/pro_physik.recipe index 8c945069fb..be1e30e2cf 100644 --- a/recipes/pro_physik.recipe +++ b/recipes/pro_physik.recipe @@ -31,7 +31,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): # cover_url = file:///c:/Users/YOUR_USERNAME/AppData/Roaming/calibre/resources/images/news_covers/Pro_Physik.png - extra_css = ''' + extra_css = ''' h1 {font-size: 1.6em; text-align: left} h2, h3 {font-size: 1.3em;text-align: left} h2.subtitle {font-size: 1.2em;text-align: left;font-style: italic} @@ -45,10 +45,10 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): ] remove_tags = [ - dict(name='ul', attrs={'class':["wj-share-buttons"]}), #Block social media + dict(name='ul', attrs={'class':['wj-share-buttons']}), # Block social media ] feeds = [ (u'Nachrichten', u'https://pro-physik.de/rss/news/'), -# (u'Veranstaltungen', u'https://pro-physik.de/rss/events/'), # AGe 2024-02-11 + # (u'Veranstaltungen', u'https://pro-physik.de/rss/events/'), # AGe 2024-02-11 ] diff --git a/recipes/project.recipe b/recipes/project.recipe index ebb3c672a1..b6ea7dc88c 100644 --- a/recipes/project.recipe +++ b/recipes/project.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Project(BasicNewsRecipe): title = '\u041F\u0440\u043E\u0435\u043A\u0442' __author__ = 'bugmen00t' - description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0435 \u0431\u044B\u0432\u0448\u0438\u043C \u0433\u043B\u0430\u0432\u043D\u044B\u043C \u0440\u0435\u0434\u0430\u043A\u0442\u043E\u0440\u043E\u043C \u0442\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B\u0430 \u00AB\u0414\u043E\u0436\u0434\u044C\u00BB \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0421\u041C\u0418 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0411\u0430\u0434\u0430\u043D\u0438\u043D\u044B\u043C, \u043A\u043E\u0442\u043E\u0440\u0435 \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0435\u0442\u0441\u044F \u043D\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u043E\u0439 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0435.' # noqa + description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0435 \u0431\u044B\u0432\u0448\u0438\u043C \u0433\u043B\u0430\u0432\u043D\u044B\u043C \u0440\u0435\u0434\u0430\u043A\u0442\u043E\u0440\u043E\u043C \u0442\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B\u0430 \u00AB\u0414\u043E\u0436\u0434\u044C\u00BB \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0421\u041C\u0418 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0411\u0430\u0434\u0430\u043D\u0438\u043D\u044B\u043C, \u043A\u043E\u0442\u043E\u0440\u0435 \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0435\u0442\u0441\u044F \u043D\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u043E\u0439 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0435.' # noqa: E501 publisher = '\u0420\u043E\u043C\u0430\u043D \u0411\u0430\u0434\u0430\u043D\u0438\u043D' category = 'blog' cover_url = u'https://proektmedia-stat.ams3.digitaloceanspaces.com/2018/08/proektmedia_facebook_default.png' diff --git a/recipes/project_syndicate.recipe b/recipes/project_syndicate.recipe index 0bd7caac26..9305be77fb 100644 --- a/recipes/project_syndicate.recipe +++ b/recipes/project_syndicate.recipe @@ -24,7 +24,7 @@ class projectsynd(BasicNewsRecipe): resolve_internal_links = True remove_empty_feeds = True remove_attributes = ['style', 'height', 'width'] - oldest_article = 7 # days + oldest_article = 7 # days storage = [] articles_are_obfuscated = True @@ -60,7 +60,7 @@ class projectsynd(BasicNewsRecipe): keep_only_tags = [ dict(attrs={'itemprop':lambda x: x and 'associatedMedia' in x.split()}), dict(attrs={'itemprop':['headline', 'datePublished', 'author', 'abstract', 'articleBody']}), - dict(name = 'aside', attrs={'data-page-area':'article-bottom'}) + dict(name='aside', attrs={'data-page-area':'article-bottom'}) ] remove_tags = [ diff --git a/recipes/prosleduet.recipe b/recipes/prosleduet.recipe index e7ed9666b8..3900da0be7 100644 --- a/recipes/prosleduet.recipe +++ b/recipes/prosleduet.recipe @@ -26,12 +26,12 @@ class ProSleduet(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class': 'container'}) - remove_tags = [ + remove_tags = [ dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'}) ] feeds = [ -# ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'), + # ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'), ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://prosleduet.media/category/news/feed/'), ('\u041B\u044E\u0434\u0438', 'https://prosleduet.media/category/people/feed/'), ('\u0421\u044E\u0436\u0435\u0442\u044B', 'https://prosleduet.media/category/syuzhety/feed/'), diff --git a/recipes/prospectmaguk_free.recipe b/recipes/prospectmaguk_free.recipe index c37513a90d..b0018324df 100644 --- a/recipes/prospectmaguk_free.recipe +++ b/recipes/prospectmaguk_free.recipe @@ -8,47 +8,47 @@ from urllib.parse import urljoin from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes -_issue_url = "" +_issue_url = '' class ProspectMagazineUKFree(BasicNewsRecipe): - title = "Prospect Magazine (Free)" - __author__ = "ping" + title = 'Prospect Magazine (Free)' + __author__ = 'ping' description = ( - "Prospect is Britain’s leading current affairs monthly magazine. " - "It is an independent and eclectic forum for writing and thinking—in " - "print and online. Published every month with two double issues in " - "the summer and winter, it spans politics, science, foreign affairs, " - "economics, the environment, philosophy and the arts." + 'Prospect is Britain’s leading current affairs monthly magazine. ' + 'It is an independent and eclectic forum for writing and thinking—in ' + 'print and online. Published every month with two double issues in ' + 'the summer and winter, it spans politics, science, foreign affairs, ' + 'economics, the environment, philosophy and the arts.' ) - language = "en_GB" - category = "news, UK" - publication_type = "magazine" - masthead_url = "https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png" - encoding = "utf-8" + language = 'en_GB' + category = 'news, UK' + publication_type = 'magazine' + masthead_url = 'https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png' + encoding = 'utf-8' remove_javascript = True no_stylesheets = True - ignore_duplicate_articles = {"url"} - INDEX = "https://www.prospectmagazine.co.uk/issues" + ignore_duplicate_articles = {'url'} + INDEX = 'https://www.prospectmagazine.co.uk/issues' - keep_only_tags = [dict(class_="prop-book-article-panel_main")] + keep_only_tags = [dict(class_='prop-book-article-panel_main')] remove_tags = [ dict( class_=[ - "prop-book-review-header-wrapper_magazine", - "prop-mobile-social-share_header", - "prop-magazine-link-block", - "pros-article-body__img-credit", - "pros-article-topics__wrapper", - "pros-article-author__image-wrapper", - "prop-book-review-promo_details-buy-mobile", + 'prop-book-review-header-wrapper_magazine', + 'prop-mobile-social-share_header', + 'prop-magazine-link-block', + 'pros-article-body__img-credit', + 'pros-article-topics__wrapper', + 'pros-article-author__image-wrapper', + 'prop-book-review-promo_details-buy-mobile', ] ), - dict(id=["disqus_thread", "newsletter_wrapper"]), - prefixed_classes("dfp-slot-"), + dict(id=['disqus_thread', 'newsletter_wrapper']), + prefixed_classes('dfp-slot-'), ] - extra_css = """ + extra_css = ''' h1 { font-size: 1.8rem; margin-bottom: 0.4rem; } .prop-book-review-header-wrapper_standfirst { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; } .prop-book-review-header-wrapper_details { margin-top: 1rem; margin-bottom: 1rem; } @@ -62,23 +62,23 @@ class ProspectMagazineUKFree(BasicNewsRecipe): .pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; } .prop-book-review-article_author { margin: 1.5rem 0; font-style: italic; } .prop-book-review-promo { margin-bottom: 1rem; } - """ + ''' def preprocess_html(self, soup): # re-position lede image - lede_img = soup.find("img", class_="prop-book-review-header-wrapper_image") - meta = soup.find("div", class_="prop-book-review-header-wrapper_details") + lede_img = soup.find('img', class_='prop-book-review-header-wrapper_image') + meta = soup.find('div', class_='prop-book-review-header-wrapper_details') if lede_img and meta: lede_img = lede_img.extract() meta.insert_after(lede_img) - for img in soup.find_all("img", attrs={"data-src": True}): - img["src"] = img["data-src"] - del img["data-src"] + for img in soup.find_all('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + del img['data-src'] - for byline_link in soup.find_all("a", attrs={"data-author-name": True}): + for byline_link in soup.find_all('a', attrs={'data-author-name': True}): byline_link.unwrap() - for author_link in soup.find_all("a", class_="pros-article-author"): + for author_link in soup.find_all('a', class_='pros-article-author'): author_link.unwrap() return soup @@ -87,39 +87,39 @@ class ProspectMagazineUKFree(BasicNewsRecipe): if not _issue_url: issues_soup = self.index_to_soup(self.INDEX) curr_issue_a_ele = issues_soup.find( - "a", class_="pros-collection-landing__item" + 'a', class_='pros-collection-landing__item' ) - curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele["href"]) + curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele['href']) else: curr_issue_url = _issue_url soup = self.index_to_soup(curr_issue_url) issue_name = ( - self.tag_to_string(soup.find(class_="magazine-lhc__issue-name")) - .replace(" issue", "") + self.tag_to_string(soup.find(class_='magazine-lhc__issue-name')) + .replace(' issue', '') .strip() ) - self.timefmt = f" [{issue_name}]" + self.timefmt = f' [{issue_name}]' - self.cover_url = soup.find("img", class_="magazine-lhc__cover-image")[ - "data-src" - ].replace("portrait_small_fit", "portrait_large_fit") + self.cover_url = soup.find('img', class_='magazine-lhc__cover-image')[ + 'data-src' + ].replace('portrait_small_fit', 'portrait_large_fit') articles = OrderedDict() - sections = soup.find_all("div", class_="pro-magazine-section") + sections = soup.find_all('div', class_='pro-magazine-section') for section in sections: section_name = self.tag_to_string( - section.find(class_="pro-magazine-section__name") + section.find(class_='pro-magazine-section__name') ) for sect_article in section.find_all( - class_="pro-magazine-section__article" + class_='pro-magazine-section__article' ): articles.setdefault(section_name, []).append( { - "url": urljoin(self.INDEX, sect_article.find("a")["href"]), - "title": self.tag_to_string( + 'url': urljoin(self.INDEX, sect_article.find('a')['href']), + 'title': self.tag_to_string( sect_article.find( - class_="pro-magazine-section__article-headline" + class_='pro-magazine-section__article-headline' ) ), } diff --git a/recipes/psych.recipe b/recipes/psych.recipe index b5353a4f1e..b07ed764d5 100644 --- a/recipes/psych.recipe +++ b/recipes/psych.recipe @@ -5,10 +5,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe def absurl(url): - if url.startswith("//"): - return "https:" + url - if url.startswith("/"): - return "https://www.psychologytoday.com" + url + if url.startswith('//'): + return 'https:' + url + if url.startswith('/'): + return 'https://www.psychologytoday.com' + url return url diff --git a/recipes/quanta_magazine.recipe b/recipes/quanta_magazine.recipe index 404b7d5a50..aa9d7567fd 100644 --- a/recipes/quanta_magazine.recipe +++ b/recipes/quanta_magazine.recipe @@ -7,25 +7,25 @@ from calibre.web.feeds.news import BasicNewsRecipe class Quanta(BasicNewsRecipe): - title = "Quanta Magazine" + title = 'Quanta Magazine' __author__ = 'lui1' - description = "Articles from the magazine. Please set to download weekly." + description = 'Articles from the magazine. Please set to download weekly.' oldest_article = 7 max_articles_per_feed = 100 language = 'en' encoding = 'UTF-8' - publication_type = "blog" - cover_url = "https://d2r55xnwy6nx47.cloudfront.net/uploads/2017/05/logo.png" + publication_type = 'blog' + cover_url = 'https://d2r55xnwy6nx47.cloudfront.net/uploads/2017/05/logo.png' feeds = [ ('Articles', 'https://api.quantamagazine.org/feed/'), ] keep_only_tags = [ - dict(name="div", attrs={"id": "postBody"}), + dict(name='div', attrs={'id': 'postBody'}), ] remove_tags = [ - dict(name="div", attrs={"class": "post__sidebar__content"}), + dict(name='div', attrs={'class': 'post__sidebar__content'}), ] diff --git a/recipes/queueacmorg.recipe b/recipes/queueacmorg.recipe index e58878f435..deb7053020 100644 --- a/recipes/queueacmorg.recipe +++ b/recipes/queueacmorg.recipe @@ -11,17 +11,17 @@ ACM Queue Magazine class QueueAcmOrg(BasicNewsRecipe): - title = "ACM Queue Magazine" + title = 'ACM Queue Magazine' __author__ = 'yodha8' - description = "Queue is the ACM magazine for practicing software engineers. Published once every 2 months. Example: Jan-Feb." + description = 'Queue is the ACM magazine for practicing software engineers. Published once every 2 months. Example: Jan-Feb.' oldest_article = 60 max_articles_per_feed = 50 auto_cleanup = True language = 'en' - cover_url = "https://queue.acm.org/img/acmqueue_logo.gif" + cover_url = 'https://queue.acm.org/img/acmqueue_logo.gif' feeds = [ - ("All Queue Content", "https://queue.acm.org/rss/feeds/queuecontent.xml"), + ('All Queue Content', 'https://queue.acm.org/rss/feeds/queuecontent.xml'), ] def get_cover_url(self): diff --git a/recipes/radio_canada.recipe b/recipes/radio_canada.recipe index 765c40920b..1316a844f0 100644 --- a/recipes/radio_canada.recipe +++ b/recipes/radio_canada.recipe @@ -24,6 +24,7 @@ def classes(classes): return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + class RadioCanada(BasicNewsRecipe): title = 'Radio Canada' __author__ = 'quatorze, pticrix' @@ -47,28 +48,28 @@ class RadioCanada(BasicNewsRecipe): ''' keep_only_tags = [ - classes('text-fluid1' # title - ' group/signature' # author (top) - ' sc-jbo7hw-6 cemuXe sc-1ejcmnj-0 bKbDpQ' # whole article block - ' sc-jbo7hw-4 gPWroG' # individual paragraphs - ' sc-jbo7hw-3 GgmiC' # section title (h2 headings) - ' sc-1tkrlyq-0 sc-1tkrlyq-1 eJZZNJ dthPak sc-1fmq1ly-0 hGSCGE' # images - ' sc-1tkrlyq-2 gucMx transition-opacity ease-out' # images - ' sc-ic6be9-0 eZGuin' # figure/picture caption + classes('text-fluid1' # title + ' group/signature' # author (top) + ' sc-jbo7hw-6 cemuXe sc-1ejcmnj-0 bKbDpQ' # whole article block + ' sc-jbo7hw-4 gPWroG' # individual paragraphs + ' sc-jbo7hw-3 GgmiC' # section title (h2 headings) + ' sc-1tkrlyq-0 sc-1tkrlyq-1 eJZZNJ dthPak sc-1fmq1ly-0 hGSCGE' # images + ' sc-1tkrlyq-2 gucMx transition-opacity ease-out' # images + ' sc-ic6be9-0 eZGuin' # figure/picture caption ' blockquote' ), ] remove_tags = [ - classes('print:hidden' # whatever is deemed not necessary while printing - ' xsOnly:hidden' # whetever is deemed not necessary on very small screens - ' sc-jbo7hw-0 dQmOIK' # ads - ' sc-1f1cagl-0 hvyKh' # buttons? (full-screen, contact author) - ' sc-jbo7hw-2' # link to full dossier and insights - ' sc-fqkvVR crilYZ rcplayer-show' # video player - ' framed' # "À lire aussi" (might be desirable in some cases?) - ' bg-gray100 light:bg-gray100 dark:bg-gray999 lg:mt-10 print:hidden' # "À la une" (links to other current events) and more - ' sc-pahfbg-0 beUHeC' # Infolettre + classes('print:hidden' # whatever is deemed not necessary while printing + ' xsOnly:hidden' # whetever is deemed not necessary on very small screens + ' sc-jbo7hw-0 dQmOIK' # ads + ' sc-1f1cagl-0 hvyKh' # buttons? (full-screen, contact author) + ' sc-jbo7hw-2' # link to full dossier and insights + ' sc-fqkvVR crilYZ rcplayer-show' # video player + ' framed' # "À lire aussi" (might be desirable in some cases?) + ' bg-gray100 light:bg-gray100 dark:bg-gray999 lg:mt-10 print:hidden' # "À la une" (links to other current events) and more + ' sc-pahfbg-0 beUHeC' # Infolettre ), dict(name='aside') ] @@ -103,7 +104,7 @@ class RadioCanada(BasicNewsRecipe): ('Grands titres', 'https://ici.radio-canada.ca/rss/771'), ('Football', 'https://ici.radio-canada.ca/rss/1000057'), ('Hockey', 'https://ici.radio-canada.ca/rss/1000056'), - ('Olympiques', 'https://ici.radio-canada.ca/rss/64852'), + ('Olympiques', 'https://ici.radio-canada.ca/rss/64852'), ('Podium', 'https://ici.radio-canada.ca/rss/555082'), ('Soccer', 'https://ici.radio-canada.ca/rss/1000058'), ('Tennis', 'https://ici.radio-canada.ca/rss/1000059'), @@ -151,16 +152,16 @@ class RadioCanada(BasicNewsRecipe): # The following was copied and adapted as per the following post: # https://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6 # Credit goes to user Starson17 - def parse_feeds (self): - feeds = BasicNewsRecipe.parse_feeds(self) - for feed in feeds: - for article in feed.articles[:]: - if ('VIDEO' in article.title.upper() or - 'OHDIO' in article.title.upper() or - '/emissions/' in article.url or - '/segments/' in article.url or - '/entrevue/' in article.url or - '/ohdio/' in article.url - ): - feed.articles.remove(article) - return feeds + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if ('VIDEO' in article.title.upper() or + 'OHDIO' in article.title.upper() or + '/emissions/' in article.url or + '/segments/' in article.url or + '/entrevue/' in article.url or + '/ohdio/' in article.url + ): + feed.articles.remove(article) + return feeds diff --git a/recipes/radio_prague.recipe b/recipes/radio_prague.recipe index 55f4e8d15f..4aca975b09 100644 --- a/recipes/radio_prague.recipe +++ b/recipes/radio_prague.recipe @@ -18,7 +18,7 @@ class AdvancedUserRecipe1291540961(BasicNewsRecipe): language = 'en_CZ' publication_type = 'newsportal' - extra_css = 'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' # noqa + extra_css = 'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' # noqa: E501 keep_only_tags = [ dict(name='div', attrs={'class': ['main']}) diff --git a/recipes/radiosvoboda_ua.recipe b/recipes/radiosvoboda_ua.recipe index 6aa730dee9..3520accab0 100644 --- a/recipes/radiosvoboda_ua.recipe +++ b/recipes/radiosvoboda_ua.recipe @@ -7,8 +7,8 @@ from calibre.web.feeds.news import BasicNewsRecipe class RadioSvoboda(BasicNewsRecipe): title = '\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430' __author__ = 'bugmen00t' - description = '\u0420\u0430\u0434\u0456\u043E\u0441\u0442\u0430\u043D\u0446\u0456\u044F \u0442\u0430 \u0456\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0438\u0434\u0430\u043D\u043D\u044F, \u044F\u043A\u0430 \u043F\u043E\u0437\u0438\u0446\u0456\u043E\u043D\u0443\u0454 \u0441\u0435\u0431\u0435 \u044F\u043A \u043F\u0440\u0438\u0432\u0430\u0442\u043D\u0438\u0439 \u043D\u0435\u043A\u043E\u043C\u0435\u0440\u0446\u0456\u0439\u043D\u0438\u0439 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u043E-\u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0439 \u0437\u0430\u0441\u0456\u0431 \u043C\u0430\u0441\u043E\u0432\u043E\u0457 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0457.' # noqa - publisher = '\u0420\u0430\u0434\u0456\u043E \u0412\u0456\u043B\u044C\u043D\u0430 \u0404\u0432\u0440\u043E\u043F\u0430/\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430 (\u0420\u0412\u0404/\u0420\u0421)' # noqa + description = '\u0420\u0430\u0434\u0456\u043E\u0441\u0442\u0430\u043D\u0446\u0456\u044F \u0442\u0430 \u0456\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0432\u0438\u0434\u0430\u043D\u043D\u044F, \u044F\u043A\u0430 \u043F\u043E\u0437\u0438\u0446\u0456\u043E\u043D\u0443\u0454 \u0441\u0435\u0431\u0435 \u044F\u043A \u043F\u0440\u0438\u0432\u0430\u0442\u043D\u0438\u0439 \u043D\u0435\u043A\u043E\u043C\u0435\u0440\u0446\u0456\u0439\u043D\u0438\u0439 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u043E-\u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0439 \u0437\u0430\u0441\u0456\u0431 \u043C\u0430\u0441\u043E\u0432\u043E\u0457 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0457.' # noqa: E501 + publisher = '\u0420\u0430\u0434\u0456\u043E \u0412\u0456\u043B\u044C\u043D\u0430 \u0404\u0432\u0440\u043E\u043F\u0430/\u0420\u0430\u0434\u0456\u043E \u0421\u0432\u043E\u0431\u043E\u0434\u0430 (\u0420\u0412\u0404/\u0420\u0421)' # noqa: E501 category = 'newspaper' cover_url = u'https://www.radiosvoboda.org/Content/responsive/RFE/uk-UA/img/top_logo_news.png' language = 'uk' diff --git a/recipes/rbc_ru.recipe b/recipes/rbc_ru.recipe index 271b842060..8f9a02e7ce 100644 --- a/recipes/rbc_ru.recipe +++ b/recipes/rbc_ru.recipe @@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class RBC_ru(BasicNewsRecipe): title = u'RBC.ru' __author__ = 'A. Chewi (with fixes by bugmen00t)' - description = 'Российское информационное агентство «РосБизнесКонсалтинг» (РБК) - ленты новостей политики, экономики и финансов, аналитические материалы, комментарии и прогнозы, тематические статьи' # noqa + description = 'Российское информационное агентство «РосБизнесКонсалтинг» (РБК) - ленты новостей политики, экономики и финансов, аналитические материалы, комментарии и прогнозы, тематические статьи' # noqa: E501 needs_subscription = False cover_url = 'https://pics.rbc.ru/img/fp_v4/skin/img/logo.gif' cover_margins = (80, 160, '#ffffff') diff --git a/recipes/rbcua_ru.recipe b/recipes/rbcua_ru.recipe index df124ce296..c2b70ccf96 100644 --- a/recipes/rbcua_ru.recipe +++ b/recipes/rbcua_ru.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class RBCUA(BasicNewsRecipe): title = '\u0420\u0411\u041A-\u0423\u043A\u0440\u0430\u0438\u043D\u0430' __author__ = 'bugmen00t' - description = '\u0423\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u043E\u0435 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0444\u0438\u043D\u0430\u043D\u0441\u043E\u0432\u044B\u0445, \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u0445 \u0423\u043A\u0440\u0430\u0438\u043D\u044B.' # noqa + description = '\u0423\u043A\u0440\u0430\u0438\u043D\u0441\u043A\u043E\u0435 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u043E\u043D\u043D\u043E\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0444\u0438\u043D\u0430\u043D\u0441\u043E\u0432\u044B\u0445, \u044D\u043A\u043E\u043D\u043E\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u0438 \u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0445 \u043D\u043E\u0432\u043E\u0441\u0442\u044F\u0445 \u0423\u043A\u0440\u0430\u0438\u043D\u044B.' # noqa: E501 publisher = '\u041E\u041E\u041E \u00AB\u042E\u0431\u0438\u0442\u0438 \u041C\u0435\u0434\u0438\u0430\u00BB' category = 'news' cover_url = u'https://www.rbc.ua/static/common/imgs/logo650.jpg' diff --git a/recipes/rbcua_ua.recipe b/recipes/rbcua_ua.recipe index 49779b7395..aa5b1628b9 100644 --- a/recipes/rbcua_ua.recipe +++ b/recipes/rbcua_ua.recipe @@ -7,7 +7,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class RBCUA(BasicNewsRecipe): title = '\u0420\u0411\u041A-\u0423\u043A\u0440\u0430\u0457\u043D\u0430' __author__ = 'bugmen00t' - description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E.' # noqa + description = '\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435 \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E.' # noqa: E501 publisher = '\u0422\u041E\u0412 \u00AB\u042E\u0431\u0456\u0442\u0456 \u041C\u0435\u0434\u0456\u0430\u00BB' category = 'news' cover_url = u'https://www.rbc.ua/static/img/r/b/rbc_pic_best_news_650x410_2_650x410_1_650x410_1_480x270.jpg' diff --git a/recipes/readers_digest.recipe b/recipes/readers_digest.recipe index 79cd854ea8..9c373528c6 100644 --- a/recipes/readers_digest.recipe +++ b/recipes/readers_digest.recipe @@ -23,7 +23,6 @@ class ReadersDigest(BasicNewsRecipe): oldest_article = 60 max_articles_per_feed = 200 - language = 'en' remove_javascript = True remove_attributes = ['style'] diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 43c8c8211c..1c99c8b367 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,6 +1,6 @@ -""" +''' Pocket Calibre Recipe v1.5 -""" +''' import json import operator @@ -53,15 +53,15 @@ class Pocket(BasicNewsRecipe): articles = [] def get_browser(self, *args, **kwargs): - """ + ''' We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks Pocket api requires username and password so fail loudly if it's missing from the config. - """ + ''' br = BasicNewsRecipe.get_browser(self, - user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \ - en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \ - Version/5.0.3 Safari/533.19.4') + user_agent='''Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) + AppleWebKit/533.19.4 (KHTML, like Gecko) + Version/5.0.3 Safari/533.19.4''') if self.username is not None and self.password is not None: br.open(self.legacy_login_url) br.select_form(nr=0) @@ -69,33 +69,33 @@ class Pocket(BasicNewsRecipe): br['password'] = self.password br.submit() else: - self.user_error("This Recipe requires authentication") + self.user_error('This Recipe requires authentication') return br def get_auth_uri(self): - """Quick function to return the authentication part of the url""" - uri = "" - uri = u'{0}&apikey={1!s}'.format(uri, self.apikey) + '''Quick function to return the authentication part of the url''' + uri = '' + uri = u'{0}&apikey={1}'.format(uri, self.apikey) if self.username is None or self.password is None: - self.user_error("Username or password is blank.") + self.user_error('Username or password is blank.') else: - uri = u'{0}&username={1!s}'.format(uri, self.username) - uri = u'{0}&password={1!s}'.format(uri, self.password) + uri = u'{0}&username={1}'.format(uri, self.username) + uri = u'{0}&password={1}'.format(uri, self.password) return uri def get_pull_articles_uri(self): - uri = "" + uri = '' uri = u'{0}&state={1}'.format(uri, u'unread') uri = u'{0}&contentType={1}'.format(uri, u'article') uri = u'{0}&sort={1}'.format(uri, self.sort_method) - uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed) + uri = u'{0}&count={1}'.format(uri, self.max_articles_per_feed) if self.only_pull_tag is not None: uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag) return uri def parse_index(self): pocket_feed = [] - fetch_url = u"{0}?{1}{2}".format( + fetch_url = u'{0}?{1}{2}'.format( self.read_api_url, self.get_auth_uri(), self.get_pull_articles_uri() @@ -106,7 +106,7 @@ class Pocket(BasicNewsRecipe): if len(pocket_feed) < self.minimum_articles: self.mark_as_read_after_dl = False self.user_error( - "Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed))) + 'Only {0} articles retrieved, minimum_articles not reached'.format(len(pocket_feed))) for pocket_article in pocket_feed.items(): self.articles.append({ @@ -119,7 +119,7 @@ class Pocket(BasicNewsRecipe): 'sort': pocket_article[1]['sort_id'] }) self.articles = sorted(self.articles, key=operator.itemgetter('sort')) - return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)] + return [('My Pocket Articles for {0}'.format(strftime('[%I:%M %p]')), self.articles)] def mark_as_read(self, mark_list): actions_list = [] @@ -147,18 +147,15 @@ class Pocket(BasicNewsRecipe): def cleanup(self): if self.mark_as_read_after_dl: self.mark_as_read([x['item_id'] for x in self.articles]) - else: - pass def default_cover(self, cover_file): - """ + ''' Create a generic cover for recipes that don't have a cover This override adds time to the cover - """ + ''' try: from calibre.ebooks.covers import calibre_cover2 - title = self.title if isinstance(self.title, type(u'')) else \ - self.title.decode('utf-8', 'replace') + title = self.title if isinstance(self.title, type(u'')) else self.title.decode('utf-8', 'replace') date = strftime(self.timefmt) time = strftime('[%I:%M %p]') img_data = calibre_cover2(title, date, time) diff --git a/recipes/real_clear.recipe b/recipes/real_clear.recipe index a2031e8b7b..13a0650e5e 100644 --- a/recipes/real_clear.recipe +++ b/recipes/real_clear.recipe @@ -35,17 +35,17 @@ class RealClear(BasicNewsRecipe): # Numeric parameter is type, controls whether we look for feedsets = [ - ["Politics", "http://www.realclearpolitics.com/index.xml", 0], - ["Policy", "http://www.realclearpolicy.com/index.xml", 0], - ["Science", "http://www.realclearscience.com/index.xml", 0], - ["Tech", "http://www.realcleartechnology.com/index.xml", 0], + ['Politics', 'http://www.realclearpolitics.com/index.xml', 0], + ['Policy', 'http://www.realclearpolicy.com/index.xml', 0], + ['Science', 'http://www.realclearscience.com/index.xml', 0], + ['Tech', 'http://www.realcleartechnology.com/index.xml', 0], # The feedburner is essentially the same as the top feed, politics. # ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1], # ["Commentary", "http://feeds.feedburner.com/Realclearpolitics-Articles", 1], - ["Markets Home", "http://www.realclearmarkets.com/index.xml", 0], - ["Markets", "http://www.realclearmarkets.com/articles/index.xml", 0], - ["World", "http://www.realclearworld.com/index.xml", 0], - ["World Blog", "http://www.realclearworld.com/blog/index.xml", 2] + ['Markets Home', 'http://www.realclearmarkets.com/index.xml', 0], + ['Markets', 'http://www.realclearmarkets.com/articles/index.xml', 0], + ['World', 'http://www.realclearworld.com/index.xml', 0], + ['World Blog', 'http://www.realclearworld.com/blog/index.xml', 2] ] # Hints to extractPrintURL. # First column is the URL snippet. Then the string to search for as text, @@ -53,13 +53,12 @@ class RealClear(BasicNewsRecipe): # drill down. phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4) - printhints = [["realclear", "", '', 'printpage'], - ["billoreilly.com", "Print this entry", 'a', ''], - ["billoreilly.com", "Print This Article", 'a', ''], - ["politico.com", "Print", - 'a', 'share-print'], - ["nationalreview.com", ">Print<", 'a', ''], - ["reason.com", "", 'a', 'printer'] + printhints = [['realclear', '', '', 'printpage'], + ['billoreilly.com', 'Print this entry', 'a', ''], + ['billoreilly.com', 'Print This Article', 'a', ''], + ['politico.com', 'Print', 'a', 'share-print'], + ['nationalreview.com', '>Print<', 'a', ''], + ['reason.com', '', 'a', 'printer'] # The following are not supported due to JavaScripting, and would require obfuscated_article to handle # forbes, # usatoday - just prints with all current crap anyhow @@ -82,12 +81,12 @@ class RealClear(BasicNewsRecipe): def extractPrintURL(self, pageURL): tagURL = pageURL baseParse = urlparse(pageURL) - baseURL = baseParse[0] + "://" + baseParse[1] + baseURL = baseParse[0] + '://' + baseParse[1] hintsCount = len(self.printhints) - for x in range(0, hintsCount): + for x in range(hintsCount): if pageURL.find(self.printhints[x][0]) == -1: continue - print("Trying " + self.printhints[x][0]) + print('Trying ' + self.printhints[x][0]) # Only retrieve the soup if we have a match to check for the # printed article with. soup = self.index_to_soup(pageURL) @@ -96,51 +95,51 @@ class RealClear(BasicNewsRecipe): if len(self.printhints[x][self.phHrefSearch]) > 0 and len(self.printhints[x][self.phLinkText]) == 0: # e.g. RealClear if self.debugMessages is True: - print("Search by href: " + + print('Search by href: ' + self.printhints[x][self.phHrefSearch]) printFind = soup.find(href=re.compile( self.printhints[x][self.phHrefSearch])) elif len(self.printhints[x][3]) > 0 and len(self.printhints[x][1]) == 0: if self.debugMessages is True: - print("Search 1: " + - self.printhints[x][2] + " Attributes: ") + print('Search 1: ' + + self.printhints[x][2] + ' Attributes: ') print(self.printhints[x][3]) printFind = soup.find( self.printhints[x][2], attrs=self.printhints[x][3]) elif len(self.printhints[x][3]) > 0: if self.debugMessages is True: - print("search2") + print('search2') printFind = soup.find(self.printhints[x][2], attrs=self.printhints[ x][3], text=self.printhints[x][1]) else: if self.debugMessages is True: print( - "Default Search: " + self.printhints[x][2] + " Text: " + self.printhints[x][1]) + 'Default Search: ' + self.printhints[x][2] + ' Text: ' + self.printhints[x][1]) printFind = soup.find( self.printhints[x][2], text=self.printhints[x][1]) if printFind is None: if self.debugMessages is True: - print("Not Found") + print('Not Found') # print(soup) - print("end soup\n\n") + print('end soup\n\n') continue print(printFind) if isinstance(printFind, NavigableString) is False: if printFind['href'] is not None: - print("Check " + printFind['href'] + - " for base of " + baseURL) - if printFind['href'].find("http") != 0: + print('Check ' + printFind['href'] + + ' for base of ' + baseURL) + if printFind['href'].find('http') != 0: return baseURL + printFind['href'] return printFind['href'] tag = printFind.parent print(tag) if tag.get('href', None) is None: if self.debugMessages is True: - print("Not in parent, trying skip-up") + print('Not in parent, trying skip-up') if tag.parent['href'] is None: if self.debugMessages is True: - print("Not in skip either, aborting") + print('Not in skip either, aborting') continue return tag.parent['href'] return tag['href'] @@ -148,45 +147,45 @@ class RealClear(BasicNewsRecipe): def get_browser(self): if self.debugMessages is True: - print("In get_browser") + print('In get_browser') br = BasicNewsRecipe.get_browser(self) return br def parseRSS(self, index): if self.debugMessages is True: - print("\n\nStarting " + self.feedsets[index][0]) + print('\n\nStarting ' + self.feedsets[index][0]) articleList = [] soup = self.index_to_soup(self.feedsets[index][1]) - for div in soup.findAll("item"): - title = div.find("title").contents[0] - urlEl = div.find("originalLink") + for div in soup.findAll('item'): + title = div.find('title').contents[0] + urlEl = div.find('originalLink') if urlEl is None or len(urlEl.contents) == 0: - urlEl = div.find("originallink") + urlEl = div.find('originallink') if urlEl is None or len(urlEl.contents) == 0: - urlEl = div.find("link") + urlEl = div.find('link') if urlEl is None or len(urlEl.contents) == 0: - urlEl = div.find("guid") + urlEl = div.find('guid') if urlEl is None or title is None or len(urlEl.contents) == 0: - print("Error in feed " + self.feedsets[index][0]) + print('Error in feed ' + self.feedsets[index][0]) print(div) continue print(title) print(urlEl) - url = urlEl.contents[0].encode("utf-8") - description = div.find("description") + url = urlEl.contents[0].encode('utf-8') + description = div.find('description') if description is not None and description.contents is not None and len(description.contents) > 0: description = description.contents[0] else: - description = "None" - pubDateEl = div.find("pubDate") + description = 'None' + pubDateEl = div.find('pubDate') if pubDateEl is None: - pubDateEl = div.find("pubdate") + pubDateEl = div.find('pubdate') if pubDateEl is None: pubDate = time.strftime('%a, %d %b') else: pubDate = pubDateEl.contents[0] if self.debugMessages is True: - print("Article") + print('Article') print(title) print(description) print(pubDate) @@ -218,7 +217,7 @@ class RealClear(BasicNewsRecipe): # articleList = [] ans = [] feedsCount = len(self.feedsets) - for x in range(0, feedsCount): # should be ,4 + for x in range(feedsCount): # should be ,4 feedarticles = self.parseRSS(x) if feedarticles is not None: ans.append((self.feedsets[x][0], feedarticles)) diff --git a/recipes/reason_magazine.recipe b/recipes/reason_magazine.recipe index e419dfb9fd..d7d8920043 100644 --- a/recipes/reason_magazine.recipe +++ b/recipes/reason_magazine.recipe @@ -9,7 +9,7 @@ from calibre import prepare_string_for_xml from calibre.web.feeds.news import BasicNewsRecipe, classes -# {{{ parse article JSON +# parse article JSON {{{ def process_image_block(lines, block): caption = block.get('captionText') caption_lines = [] diff --git a/recipes/red_voltaire.recipe b/recipes/red_voltaire.recipe index e77e0c4460..2c5e988b3d 100644 --- a/recipes/red_voltaire.recipe +++ b/recipes/red_voltaire.recipe @@ -18,9 +18,9 @@ class RedVoltaireRecipe(BasicNewsRecipe): masthead_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png' preprocess_regexps = [ - (re.compile(r'(?P<titulo>.+).+

' + match.group('titulo') + '

(?P.+).+

' + match.group('titulo') + '

. (?P.+).+', re.IGNORECASE | re.DOTALL), lambda match:'' + match.group('fecha') + ''), (re.compile(r'

', re.IGNORECASE), lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), - (re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), + (re.compile(r'<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), lambda match : '

%s

'%(match.group(1),)), - (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), + (re.compile(r'<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), lambda match : '

%s

'%(match.group(1),)), ] return ans @@ -458,10 +457,10 @@ class HTMLPreProcessor: re.IGNORECASE).search(src) is not None def is_book_designer(self, raw): - return re.search('<]*id=BookTitle', raw) is not None + return re.search(r'<]*id=BookTitle', raw) is not None def is_pdftohtml(self, src): - return '' in src[:1000] + return "" in src[:1000] def __call__(self, html, remove_special_chars=None, get_preprocess_html=False): @@ -481,7 +480,7 @@ class HTMLPreProcessor: start_rules = [] if not getattr(self.extra_opts, 'keep_ligatures', False): - html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) + html = _ligpat.sub(lambda m: LIGATURES[m.group()], html) user_sr_rules = {} # Function for processing search and replace @@ -528,7 +527,7 @@ class HTMLPreProcessor: docanalysis = DocAnalysis('pdf', html) length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor')) if length: - # print("The pdf line length returned is " + str(length)) + # print('The pdf line length returned is ' + str(length)) # unwrap em/en dashes end_rules.append((re.compile( r'(?<=.{%i}[–—])\s*

\s*(?=[\[a-z\d])' % length), lambda match: '')) @@ -617,7 +616,7 @@ class HTMLPreProcessor: html = preprocessor(html) if is_pdftohtml: - html = html.replace('', '') + html = html.replace("", '') if getattr(self.extra_opts, 'smarten_punctuation', False): html = smarten_punctuation(html, self.log) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 5c074a69f1..6e40d32a31 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -27,22 +27,22 @@ class HeuristicProcessor: self.chapters_with_title = 0 self.blanks_deleted = False self.blanks_between_paragraphs = False - self.linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL) + self.linereg = re.compile(r'(?<=)', re.IGNORECASE|re.DOTALL) self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) self.anyblank = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*]*>\s*

(\s*]*>\s*\s*)*){2,}(?!\s*]*>\s*

(\s*]*>\s*\s*)*){2,}', re.IGNORECASE) self.line_open = ( - r"<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*" - r"(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*") - self.line_close = "()?\\s*()?\\s*()?\\s*" + r'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*' + r'(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*') + self.line_close = r'()?\s*()?\s*()?\s*' self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*)', re.IGNORECASE) self.scene_break_open = '

' - self.common_in_text_endings = '[\"\'—’”,\\.!\\?\\…\\)„\\w]' - self.common_in_text_beginnings = '[\\w\'\"“‘‛]' + self.common_in_text_endings = r'["\'—’”,\.!\?\…\)„\w]' + self.common_in_text_beginnings = r'[\w\'"“‘‛]' def is_pdftohtml(self, src): - return '' in src[:1000] + return "" in src[:1000] def is_abbyy(self, src): return ''+chap+'\n' else: - delete_whitespace = re.compile('^\\s*(?P.*?)\\s*$') - delete_quotes = re.compile('\'\"') - txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(chap))) - txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(title))) + delete_whitespace = re.compile(r'^\s*(?P.*?)\s*$') + delete_quotes = re.compile(r'\'"') + txt_chap = delete_quotes.sub('', delete_whitespace.sub(r'\g', html2text(chap))) + txt_title = delete_quotes.sub('', delete_whitespace.sub(r'\g', html2text(title))) self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + str(self.html_preprocess_sections) + - " chapters & titles. - " + str(chap) + ", " + str(title)) + self.log.debug('marked ' + str(self.html_preprocess_sections) + + ' chapters & titles. - ' + str(chap) + ', ' + str(title)) return '

'+chap+'

\n

'+title+'

\n' def chapter_break(self, match): chap = match.group('section') styles = match.group('styles') self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + str(self.html_preprocess_sections) + - " section markers based on punctuation. - " + str(chap)) + self.log.debug('marked ' + str(self.html_preprocess_sections) + + ' section markers based on punctuation. - ' + str(chap)) return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): @@ -108,14 +108,14 @@ class HeuristicProcessor: inspect. Percent is the minimum percent of line endings which should be marked up to return true. ''' - htm_end_ere = re.compile('', re.DOTALL) - line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) + htm_end_ere = re.compile(r'', re.DOTALL) + line_end_ere = re.compile(r'(\n|\r|\r\n)', re.DOTALL) htm_end = htm_end_ere.findall(raw) line_end = line_end_ere.findall(raw) tot_htm_ends = len(htm_end) tot_ln_fds = len(line_end) # self.log.debug("There are " + str(tot_ln_fds) + " total Line feeds, and " + - # str(tot_htm_ends) + " marked up endings") + # str(tot_htm_ends) + " marked up endings") if percent > 1: percent = 1 @@ -208,60 +208,60 @@ class HeuristicProcessor: if wordcount > 200000: typical_chapters = 15000. self.min_chapters = int(ceil(wordcount / typical_chapters)) - self.log.debug("minimum chapters required are: "+str(self.min_chapters)) - heading = re.compile(']*>', re.IGNORECASE) + self.log.debug('minimum chapters required are: '+str(self.min_chapters)) + heading = re.compile(r']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) - self.log.debug("found " + str(self.html_preprocess_sections) + " pre-existing headings") + self.log.debug('found ' + str(self.html_preprocess_sections) + ' pre-existing headings') # Build the Regular Expressions in pieces - init_lookahead = "(?=<(p|div))" + init_lookahead = r'(?=<(p|div))' chapter_line_open = self.line_open - title_line_open = (r"<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?" - r"\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*") - chapter_header_open = r"(?P" - title_header_open = r"(?P" - chapter_header_close = ")\\s*" - title_header_close = ")" + title_line_open = (r'<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?' + r'\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*') + chapter_header_open = r'(?P<chap>' + title_header_open = r'(?P<title>' + chapter_header_close = r')\s*' + title_header_close = r')' chapter_line_close = self.line_close - title_line_close = "(</(?P=inner6)>)?\\s*(</(?P=inner5)>)?\\s*(</(?P=inner4)>)?\\s*</(?P=outer2)>" + title_line_close = r'(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>' is_pdftohtml = self.is_pdftohtml(html) if is_pdftohtml: - title_line_open = "<(?P<outer2>p)[^>]*>\\s*" - title_line_close = "\\s*</(?P=outer2)>" + title_line_open = r'<(?P<outer2>p)[^>]*>\s*' + title_line_close = r'\s*</(?P=outer2)>' if blanks_between_paragraphs: - blank_lines = "(\\s*<p[^>]*>\\s*</p>){0,2}\\s*" + blank_lines = r'(\s*<p[^>]*>\s*</p>){0,2}\s*' else: - blank_lines = "" - opt_title_open = "(" - opt_title_close = ")?" - n_lookahead_open = "(?!\\s*" - n_lookahead_close = ")\\s*" + blank_lines = '' + opt_title_open = r'(' + opt_title_close = r')?' + n_lookahead_open = r'(?!\s*' + n_lookahead_close = r')\s*' default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'’\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)" - simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)" + simple_title = r'(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)' analysis_result = [] chapter_types = [ [( r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Epilogue|CHAPTER|Kapitel|Volume\b|Prologue|Book\b|Part\b|Dedication|Preface)" - r"\s*([\d\w-]+\:?\'?\s*){0,5}"), True, True, True, False, "Searching for common section headings", 'common'], + r"\s*([\d\w-]+\:?\'?\s*){0,5}"), True, True, True, False, 'Searching for common section headings', 'common'], # Highest frequency headings which include titles - [r"[^'\"]?(CHAPTER|Kapitel)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for most common chapter headings", 'chapter'], - [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•=]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", - True, True, True, False, "Searching for emphasized lines", 'emphasized'], # Emphasized lines + [r"[^'\"]?(CHAPTER|Kapitel)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, 'Searching for most common chapter headings', 'chapter'], + [r'<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•=]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>', + True, True, True, False, 'Searching for emphasized lines', 'emphasized'], # Emphasized lines [r"[^'\"]?(\d+(\.|:))\s*([\w\-\'\"#,]+\s*){0,7}\s*", True, True, True, False, - "Searching for numeric chapter headings", 'numeric'], # Numeric Chapters - [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, True, True, False, "Searching for letter spaced headings", 'letter_spaced'], # Spaced Lettering + 'Searching for numeric chapter headings', 'numeric'], # Numeric Chapters + [r'([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*', True, True, True, False, 'Searching for letter spaced headings', 'letter_spaced'], # Spaced Lettering [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, True, True, False, - "Searching for numeric chapters with titles", 'numeric_title'], # Numeric Titles + 'Searching for numeric chapters with titles', 'numeric_title'], # Numeric Titles [r"[^'\"]?(\d+)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, - "Searching for simple numeric headings", 'plain_number'], # Numeric Chapters, no dot or colon + 'Searching for simple numeric headings', 'plain_number'], # Numeric Chapters, no dot or colon [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, True, False, False, - "Searching for chapters with Uppercase Characters", 'uppercase'] # Uppercase Chapters - ] + 'Searching for chapters with Uppercase Characters', 'uppercase'], # Uppercase Chapters + ] def recurse_patterns(html, analyze): # Start with most typical chapter headings, get more aggressive until one works @@ -299,9 +299,9 @@ class HeuristicProcessor: break full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close if n_lookahead_req: - n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) + n_lookahead = re.sub(r'(ou|in|cha)', 'lookahead_', full_chapter_line) if not analyze: - self.log.debug("Marked " + str(self.html_preprocess_sections) + " headings, " + log_message) + self.log.debug('Marked ' + str(self.html_preprocess_sections) + ' headings, ' + log_message) chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \ lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close @@ -315,10 +315,10 @@ class HeuristicProcessor: title_req = True strict_title = False self.log.debug( - str(type_name)+" had "+str(hits)+ - " hits - "+str(self.chapters_no_title)+" chapters with no title, "+ - str(self.chapters_with_title)+" chapters with titles, "+ - str(float(self.chapters_with_title) / float(hits))+" percent. ") + str(type_name)+' had '+str(hits)+ + ' hits - '+str(self.chapters_no_title)+' chapters with no title, '+ + str(self.chapters_with_title)+' chapters with titles, '+ + str(float(self.chapters_with_title) / float(hits))+' percent. ') if type_name == 'common': analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits: @@ -335,8 +335,8 @@ class HeuristicProcessor: words_per_chptr = wordcount if words_per_chptr > 0 and self.html_preprocess_sections > 0: words_per_chptr = wordcount // self.html_preprocess_sections - self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+ - str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") + self.log.debug('Total wordcount is: '+ str(wordcount)+', Average words per section is: '+ + str(words_per_chptr)+', Marked up '+str(self.html_preprocess_sections)+' chapters') return html def punctuation_unwrap(self, length, content, format): @@ -366,13 +366,13 @@ class HeuristicProcessor: # define the pieces of the regex # (?<!\&\w{4});) is a semicolon not part of an entity - lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýźâêîôûçąężłıãõñæøþðßěľščťžňďřůёђєіїјљњћўџѣа-я,:)\\IAß]|(?<!\&\w{4});))" - em_en_lookahead = "(?<=.{"+str(length)+"}[\u2013\u2014])" - soft_hyphen = "\xad" - line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?" - blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*" - line_opening = "<(p|div)[^>]*>\\s*(?P<style_open><(span|[iub])[^>]*>)?\\s*" - txt_line_wrap = "((\u0020|\u0009)*\n){1,4}" + lookahead = '(?<=.{'+str(length)+r'}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýźâêîôûçąężłıãõñæøþðßěľščťžňďřůёђєіїјљњћўџѣа-я,:)\\IAß]|(?<!\&\w{4});))' + em_en_lookahead = '(?<=.{'+str(length)+'}[\u2013\u2014])' + soft_hyphen = '\xad' + line_ending = r'\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?' + blanklines = r'\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*' + line_opening = r'<(p|div)[^>]*>\s*(?P<style_open><(span|[iub])[^>]*>)?\s*' + txt_line_wrap = r'((\u0020|\u0009)*\n){1,4}' if format == 'txt': unwrap_regex = lookahead+txt_line_wrap @@ -383,9 +383,9 @@ class HeuristicProcessor: em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening - unwrap = re.compile("%s" % unwrap_regex, re.UNICODE) - em_en_unwrap = re.compile("%s" % em_en_unwrap_regex, re.UNICODE) - shy_unwrap = re.compile("%s" % shy_unwrap_regex, re.UNICODE) + unwrap = re.compile('%s' % unwrap_regex, re.UNICODE) + em_en_unwrap = re.compile('%s' % em_en_unwrap_regex, re.UNICODE) + shy_unwrap = re.compile('%s' % shy_unwrap_regex, re.UNICODE) if format == 'txt': content = unwrap.sub(' ', content) @@ -408,7 +408,7 @@ class HeuristicProcessor: def markup_pre(self, html): pre = re.compile(r'<pre>', re.IGNORECASE) if len(pre.findall(html)) >= 1: - self.log.debug("Running Text Processing") + self.log.debug('Running Text Processing') outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL) html = outerhtml.sub(self.txt_process, html) from calibre.ebooks.conversion.preprocess import convert_entities @@ -417,20 +417,20 @@ class HeuristicProcessor: # Add markup naively # TODO - find out if there are cases where there are more than one <pre> tag or # other types of unmarked html and handle them in some better fashion - add_markup = re.compile('(?<!>)(\n)') - html = add_markup.sub('</p>\n<p>', html) + add_markup = re.compile(r'(?<!>)(\n)') + html = add_markup.sub(r'</p>\n<p>', html) return html def arrange_htm_line_endings(self, html): - html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\\g<tag>"+">\n", html) - html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\\g<tag>"+"\\g<style>"+">", html) + html = re.sub(r'\s*</(?P<tag>p|div)>', r'</\g<tag>'+'>\n', html) + html = re.sub(r'\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*', r'\n<\g<tag>\g<style>'+'>', html) return html def fix_nbsp_indents(self, html): txtindent = re.compile(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) html = txtindent.sub(self.insert_indent, html) if self.found_indents > 1: - self.log.debug("replaced "+str(self.found_indents)+ " nbsp indents with inline styles") + self.log.debug('replaced '+str(self.found_indents)+ ' nbsp indents with inline styles') return html def cleanup_markup(self, html): @@ -440,18 +440,18 @@ class HeuristicProcessor: # Get rid of empty <o:p> tags to simplify other processing html = re.sub(r'\s*<o:p>\s*</o:p>', ' ', html) # Delete microsoft 'smart' tags - html = re.sub('(?i)</?st1:\\w+>', '', html) + html = re.sub(r'(?i)</?st1:\w+>', '', html) # Re-open self closing paragraph tags - html = re.sub('<p[^>/]*/>', '<p> </p>', html) + html = re.sub(r'<p[^>/]*/>', '<p> </p>', html) # Get rid of empty span, bold, font, em, & italics tags fmt_tags = 'font|[ibu]|em|strong' open_fmt_pat, close_fmt_pat = fr'<(?:{fmt_tags})(?:\s[^>]*)?>', f'</(?:{fmt_tags})>' for i in range(2): - html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html) + html = re.sub(r'\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*', ' ', html) html = re.sub( - r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html) + r'\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}'.format(open=open_fmt_pat, close=close_fmt_pat), ' ', html) # delete surrounding divs from empty paragraphs - html = re.sub('<div[^>]*>\\s*<p[^>]*>\\s*</p>\\s*</div>', '<p> </p>', html) + html = re.sub(r'<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html) # Empty heading tags html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html) self.deleted_nbsps = True @@ -462,8 +462,8 @@ class HeuristicProcessor: determines the type of html line ending used most commonly in a document use before calling docanalysis functions ''' - paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) - spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) + paras_reg = re.compile(r'<p[^>]*>', re.IGNORECASE) + spans_reg = re.compile(r'<span[^>]*>', re.IGNORECASE) paras = len(paras_reg.findall(html)) spans = len(spans_reg.findall(html)) if spans > 1: @@ -478,8 +478,8 @@ class HeuristicProcessor: blanklines = self.blankreg.findall(html) lines = self.linereg.findall(html) if len(lines) > 1: - self.log.debug("There are " + str(len(blanklines)) + " blank lines. " + - str(float(len(blanklines)) / float(len(lines))) + " percent blank") + self.log.debug('There are ' + str(len(blanklines)) + ' blank lines. ' + + str(float(len(blanklines)) / float(len(lines))) + ' percent blank') if float(len(blanklines)) / float(len(lines)) > 0.40: return True @@ -538,7 +538,7 @@ class HeuristicProcessor: elif content.find('scenebreak') != -1: return content else: - content = re.sub('(?i)<h(?P<hnum>\\d+)[^>]*>', '\n\n<h'+'\\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content) + content = re.sub(r'(?i)<h(?P<hnum>\d+)[^>]*>', r'\n\n<h\g<hnum> style="'+top_margin+bottom_margin+'">', content) return content html = blanks_around_headings.sub(merge_header_whitespace, html) @@ -546,20 +546,20 @@ class HeuristicProcessor: def markup_whitespaces(match): blanks = match.group(0) - blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:0em; margin-bottom:0em"> </p>', blanks) + blanks = self.blankreg.sub(r'\n<p class="whitespace" style="text-align:center; margin-top:0em; margin-bottom:0em"> </p>', blanks) return blanks html = blanks_n_nopunct.sub(markup_whitespaces, html) if self.html_preprocess_sections > self.min_chapters: - html = re.sub('(?si)^.*?(?=<h\\d)', markup_whitespaces, html) + html = re.sub(r'(?si)^.*?(?=<h\d)', markup_whitespaces, html) return html def detect_soft_breaks(self, html): - line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')' - line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \ - '\\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')' - div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two + line = '(?P<initline>'+self.line_open+r'\s*(?P<init_content>.*?)'+self.line_close+')' + line_two = '(?P<line_two>'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_open)+ \ + r'\s*(?P<line_two_content>.*?)'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_close)+')' + div_break_candidate_pattern = line+r'\s*<div[^>]*>\s*</div>\s*'+line_two div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE) def convert_div_softbreaks(match): @@ -575,16 +575,16 @@ class HeuristicProcessor: html = div_break_candidate.sub(convert_div_softbreaks, html) if not self.blanks_deleted and self.blanks_between_paragraphs: - html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html) + html = self.multi_blank.sub(r'\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html) else: - html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html) + html = self.blankreg.sub(r'\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html) return html def detect_scene_breaks(self, html): scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \ - '<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?){1,10})\\s*'+self.line_close + r'<))(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?){1,10})\s*'+self.line_close scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE) - html = scene_breaks.sub(self.scene_break_open+'\\g<break>'+'</p>', html) + html = scene_breaks.sub(self.scene_break_open+r'\g<break></p>', html) return html def markup_user_break(self, replacement_break): @@ -596,54 +596,54 @@ class HeuristicProcessor: All other html is converted to text. ''' hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">' - if re.findall('(<|>)', replacement_break): - if re.match('^<hr', replacement_break): + if re.findall(r'(<|>)', replacement_break): + if re.match(r'^<hr', replacement_break): if replacement_break.find('width') != -1: try: - width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break)) + width = int(re.sub(r'.*?width(:|=)(?P<wnum>\d+).*', r'\g<wnum>', replacement_break)) except: scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' self.log.warn('Invalid replacement scene break' ' expression, using default') else: - replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) + replacement_break = re.sub(r'(?i)(width=\d+\\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break) divpercent = (100 - width) // 2 - hr_open = re.sub('45', str(divpercent), hr_open) + hr_open = re.sub(r'45', str(divpercent), hr_open) scene_break = hr_open+replacement_break+'</div>' else: scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' - elif re.match('^<img', replacement_break): + elif re.match(r'^<img', replacement_break): scene_break = self.scene_break_open+replacement_break+'</p>' else: from calibre.utils.html2text import html2text replacement_break = html2text(replacement_break) - replacement_break = re.sub('\\s', ' ', replacement_break) + replacement_break = re.sub(r'\s', ' ', replacement_break) scene_break = self.scene_break_open+replacement_break+'</p>' else: - replacement_break = re.sub('\\s', ' ', replacement_break) + replacement_break = re.sub(r'\s', ' ', replacement_break) scene_break = self.scene_break_open+replacement_break+'</p>' return scene_break def check_paragraph(self, content): - content = re.sub('\\s*</?span[^>]*>\\s*', '', content) - if re.match('.*[\"\'.!?:]$', content): - # print "detected this as a paragraph" + content = re.sub(r'\s*</?span[^>]*>\s*', '', content) + if re.match(r'.*["\'.!?:]$', content): + # print('detected this as a paragraph') return True else: return False def abbyy_processor(self, html): - abbyy_line = re.compile('((?P<linestart><p\\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE) + abbyy_line = re.compile(r'((?P<linestart><p\sstyle="(?P<styles>[^"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE) empty_paragraph = '\n<p> </p>\n' self.in_blockquote = False self.previous_was_paragraph = False - html = re.sub('</?a[^>]*>', '', html) + html = re.sub(r'</?a[^>]*>', '', html) def convert_styles(match): - # print "raw styles are: "+match.group('styles') + # print('raw styles are: '+match.group('styles')) content = match.group('content') - # print "raw content is: "+match.group('content') + # print('raw content is: '+match.group('content')) image = match.group('image') is_paragraph = False @@ -668,25 +668,25 @@ class HeuristicProcessor: else: styles = match.group('styles').split(';') is_paragraph = self.check_paragraph(content) - # print "styles for this line are: "+str(styles) + # print('styles for this line are: '+str(styles)) split_styles = [] for style in styles: - # print "style is: "+str(style) + # print('style is: '+str(style)) newstyle = style.split(':') - # print "newstyle is: "+str(newstyle) + # print('newstyle is: '+str(newstyle)) split_styles.append(newstyle) styles = split_styles for style, setting in styles: if style == 'text-align' and setting != 'left': text_align = style+':'+setting+';' if style == 'text-indent': - setting = int(re.sub('\\s*pt\\s*', '', setting)) + setting = int(re.sub(r'\s*pt\s*', '', setting)) if 9 < setting < 14: text_indent = indented_text else: text_indent = style+':'+str(setting)+'pt;' if style == 'padding': - setting = re.sub('pt', '', setting).split(' ') + setting = setting.replace('pt', '').split(' ') if int(setting[1]) < 16 and int(setting[3]) < 16: if self.in_blockquote: debugabby = True @@ -710,25 +710,25 @@ class HeuristicProcessor: self.log.debug('padding bottom is: ' + str(setting[2])) self.log.debug('padding left is: ' +str(setting[3])) - # print "text-align is: "+str(text_align) - # print "\n***\nline is:\n "+str(match.group(0))+'\n' + # print('text-align is: '+str(text_align)) + # print('\n***\nline is:\n '+str(match.group(0))+'\n') if debugabby: - # print "this line is a paragraph = "+str(is_paragraph)+", previous line was "+str(self.previous_was_paragraph) - self.log.debug("styles for this line were:", styles) + # print('this line is a paragraph = '+str(is_paragraph)+', previous line was '+str(self.previous_was_paragraph)) + self.log.debug('styles for this line were:', styles) self.log.debug('newline is:') self.log.debug(blockquote_open_loop+blockquote_close_loop+ paragraph_before+'<p style="'+text_indent+text_align+ '">'+content+'</p>'+paragraph_after+'\n\n\n\n\n') - # print "is_paragraph is "+str(is_paragraph)+", previous_was_paragraph is "+str(self.previous_was_paragraph) + # print('is_paragraph is '+str(is_paragraph)+', previous_was_paragraph is '+str(self.previous_was_paragraph)) self.previous_was_paragraph = is_paragraph - # print "previous_was_paragraph is now set to "+str(self.previous_was_paragraph)+"\n\n\n" + # print('previous_was_paragraph is now set to '+str(self.previous_was_paragraph)+'\n\n\n') return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after html = abbyy_line.sub(convert_styles, html) return html def __call__(self, html): - self.log.debug("********* Heuristic processing HTML *********") + self.log.debug('********* Heuristic processing HTML *********') # Count the words in the document to estimate how many chapters to look for and whether # other types of processing are attempted try: @@ -737,7 +737,7 @@ class HeuristicProcessor: self.log.warn("Can't get wordcount") if self.totalwords < 50: - self.log.warn("flow is too short, not running heuristics") + self.log.warn('flow is too short, not running heuristics') return html is_abbyy = self.is_abbyy(html) @@ -754,7 +754,7 @@ class HeuristicProcessor: # <pre> tags), check and mark up line endings if required before proceeding # fix indents must run after this step if self.no_markup(html, 0.1): - self.log.debug("not enough paragraph markers, adding now") + self.log.debug('not enough paragraph markers, adding now') # markup using text processing html = self.markup_pre(html) @@ -768,11 +768,11 @@ class HeuristicProcessor: is_pdftohtml = self.is_pdftohtml(html) if is_pdftohtml: - self.line_open = "<(?P<outer>p)[^>]*>(\\s*<[ibu][^>]*>)?\\s*" - self.line_close = "\\s*(</[ibu][^>]*>\\s*)?</(?P=outer)>" + self.line_open = r'<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*' + self.line_close = r'\s*(</[ibu][^>]*>\s*)?</(?P=outer)>' # ADE doesn't render <br />, change to empty paragraphs - # html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html) + # html = re.sub(r'<br[^>]*>', '<p>\u00a0</p>', html) # Determine whether the document uses interleaved blank lines self.blanks_between_paragraphs = self.analyze_blanks(html) @@ -789,9 +789,9 @@ class HeuristicProcessor: # If more than 40% of the lines are empty paragraphs and the user has enabled delete # blank paragraphs then delete blank lines to clean up spacing if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): - self.log.debug("deleting blank lines") + self.log.debug('deleting blank lines') self.blanks_deleted = True - html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html) + html = self.multi_blank.sub(r'\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html) html = self.blankreg.sub('', html) # Determine line ending type @@ -804,18 +804,18 @@ class HeuristicProcessor: # more of the lines break in the same region of the document then unwrapping is required docanalysis = DocAnalysis(format, html) hardbreaks = docanalysis.line_histogram(.50) - self.log.debug("Hard line breaks check returned "+str(hardbreaks)) + self.log.debug('Hard line breaks check returned '+str(hardbreaks)) # Calculate Length unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) - self.log.debug("Median line length is " + str(length) + ", calculated with " + format + " format") + self.log.debug('Median line length is ' + str(length) + ', calculated with ' + format + ' format') # ##### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor if hardbreaks or unwrap_factor < 0.4: - self.log.debug("Unwrapping required, unwrapping Lines") + self.log.debug('Unwrapping required, unwrapping Lines') # Dehyphenate with line length limiters dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log) html = dehyphenator(html,'html', length) @@ -823,15 +823,15 @@ class HeuristicProcessor: if getattr(self.extra_opts, 'dehyphenate', False): # dehyphenate in cleanup mode to fix anything previous conversions/editing missed - self.log.debug("Fixing hyphenated content") + self.log.debug('Fixing hyphenated content') dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log) html = dehyphenator(html,'html_cleanup', length) html = dehyphenator(html, 'individual_words', length) # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): - self.log.debug("Looking for more split points based on punctuation," - " currently have " + str(self.html_preprocess_sections)) + self.log.debug('Looking for more split points based on punctuation,' + ' currently have ' + str(self.html_preprocess_sections)) chapdetect3 = re.compile( r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)' r'(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*' @@ -845,7 +845,7 @@ class HeuristicProcessor: # headings and titles, images, etc doubleheading = re.compile( r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) - html = doubleheading.sub('\\g<firsthead>'+'\n<h3'+'\\g<secondhead>'+'</h3>', html) + html = doubleheading.sub(r'\g<firsthead>\n<h3\g<secondhead></h3>', html) # If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks, # style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks. @@ -853,7 +853,7 @@ class HeuristicProcessor: # If non-blank scene breaks exist they are center aligned and styled with appropriate margins. if getattr(self.extra_opts, 'format_scene_breaks', False): self.log.debug('Formatting scene breaks') - html = re.sub('(?i)<div[^>]*>\\s*<br(\\s?/)?>\\s*</div>', '<p></p>', html) + html = re.sub(r'(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html) html = self.detect_scene_breaks(html) html = self.detect_whitespace(html) html = self.detect_soft_breaks(html) @@ -870,11 +870,11 @@ class HeuristicProcessor: replacement_break = self.markup_user_break(replacement_break) if scene_break_count >= 1: html = detected_scene_break.sub(replacement_break, html) - html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html) + html = re.sub(r'<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html) else: - html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html) + html = re.sub(r'<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html) if self.deleted_nbsps: # put back non-breaking spaces in empty paragraphs so they render correctly - html = self.anyblank.sub('\n'+r'\g<openline>'+'\u00a0'+r'\g<closeline>', html) + html = self.anyblank.sub(r'\n\g<openline>\u00a0\g<closeline>', html) return html diff --git a/src/calibre/ebooks/covers.py b/src/calibre/ebooks/covers.py index 048d1f0bef..8b106f7663 100644 --- a/src/calibre/ebooks/covers.py +++ b/src/calibre/ebooks/covers.py @@ -192,7 +192,7 @@ class Block: @position.setter def position(self, new_pos): - (x, y) = new_pos + x, y = new_pos self._position = Point(x, y) if self.layouts: self.layouts[0].setPosition(QPointF(x, y)) @@ -246,8 +246,8 @@ def layout_text(prefs, img, title, subtitle, footer, max_height, style): # }}} -# Format text using templates {{{ +# Format text using templates {{{ def sanitize(s): return unicodedata.normalize('NFC', clean_xml_chars(clean_ascii_chars(force_unicode(s or '')))) @@ -318,7 +318,7 @@ ColorTheme = namedtuple('ColorTheme', 'color1 color2 contrast_color1 contrast_co def to_theme(x): - return {k:v for k, v in zip(ColorTheme._fields[:4], x.split())} + return dict(zip(ColorTheme._fields[:4], x.split())) fallback_colors = to_theme('ffffff 000000 000000 ffffff') @@ -355,8 +355,8 @@ def color(color_theme, name): # }}} -# Styles {{{ +# Styles {{{ class Style: @@ -499,7 +499,7 @@ class Ornamental(Style): GUI_NAME = _('Ornamental') # SVG vectors {{{ - CORNER_VECTOR = "m 67.791903,64.260958 c -4.308097,-2.07925 -4.086719,-8.29575 0.334943,-9.40552 4.119758,-1.03399 8.732363,5.05239 5.393055,7.1162 -0.55,0.33992 -1,1.04147 -1,1.55902 0,1.59332 2.597425,1.04548 5.365141,-1.1316 1.999416,-1.57274 2.634859,-2.96609 2.634859,-5.7775 0,-9.55787 -9.827495,-13.42961 -24.43221,-9.62556 -3.218823,0.83839 -5.905663,1.40089 -5.970755,1.25 -0.06509,-0.1509 -0.887601,-1.19493 -1.827799,-2.32007 -1.672708,-2.00174 -1.636693,-2.03722 1.675668,-1.65052 1.861815,0.21736 6.685863,-0.35719 10.720107,-1.27678 12.280767,-2.79934 20.195487,-0.0248 22.846932,8.0092 3.187273,9.65753 -6.423297,17.7497 -15.739941,13.25313 z m 49.881417,-20.53932 c -3.19204,-2.701 -3.72967,-6.67376 -1.24009,-9.16334 2.48236,-2.48236 5.35141,-2.67905 7.51523,-0.51523 1.85966,1.85966 2.07045,6.52954 0.37143,8.22857 -2.04025,2.04024 3.28436,1.44595 6.92316,-0.77272 9.66959,-5.89579 0.88581,-18.22422 -13.0777,-18.35516 -5.28594,-0.0496 -10.31098,1.88721 -14.26764,5.4991 -1.98835,1.81509 -2.16454,1.82692 -2.7936,0.18763 -0.40973,-1.06774 0.12141,-2.82197 1.3628,-4.50104 2.46349,-3.33205 1.67564,-4.01299 -2.891784,-2.49938 -2.85998,0.94777 -3.81038,2.05378 -5.59837,6.51495 -1.184469,2.95536 -3.346819,6.86882 -4.805219,8.69657 -1.4584,1.82776 -2.65164,4.02223 -2.65164,4.87662 0,3.24694 -4.442667,0.59094 -5.872557,-3.51085 -1.361274,-3.90495 0.408198,-8.63869 4.404043,-11.78183 5.155844,-4.05558 1.612374,-3.42079 -9.235926,1.65457 -12.882907,6.02725 -16.864953,7.18038 -24.795556,7.18038 -8.471637,0 -13.38802,-1.64157 -17.634617,-5.88816 -2.832233,-2.83224 -3.849773,-4.81378 -4.418121,-8.6038 -1.946289,-12.9787795 8.03227,-20.91713135 19.767685,-15.7259993 5.547225,2.4538018 6.993631,6.1265383 3.999564,10.1557393 -5.468513,7.35914 -15.917883,-0.19431 -10.657807,-7.7041155 1.486298,-2.1219878 1.441784,-2.2225068 -0.984223,-2.2225068 -1.397511,0 -4.010527,1.3130878 -5.806704,2.9179718 -2.773359,2.4779995 -3.265777,3.5977995 -3.265777,7.4266705 0,5.10943 2.254112,8.84197 7.492986,12.40748 8.921325,6.07175 19.286666,5.61396 37.12088,-1.63946 15.35037,-6.24321 21.294999,-7.42408 34.886123,-6.92999 11.77046,0.4279 19.35803,3.05537 24.34054,8.42878 4.97758,5.3681 2.53939,13.58271 -4.86733,16.39873 -4.17361,1.58681 -11.00702,1.19681 -13.31978,-0.76018 z m 26.50156,-0.0787 c -2.26347,-2.50111 -2.07852,-7.36311 0.39995,-10.51398 2.68134,-3.40877 10.49035,-5.69409 18.87656,-5.52426 l 6.5685,0.13301 -7.84029,0.82767 c -8.47925,0.89511 -12.76997,2.82233 -16.03465,7.20213 -1.92294,2.57976 -1.96722,3.00481 -0.57298,5.5 1.00296,1.79495 2.50427,2.81821 4.46514,3.04333 2.92852,0.33623 2.93789,0.32121 1.08045,-1.73124 -1.53602,-1.69728 -1.64654,-2.34411 -0.61324,-3.58916 2.84565,-3.4288 7.14497,-0.49759 5.03976,3.43603 -1.86726,3.48903 -8.65528,4.21532 -11.3692,1.21647 z m -4.17462,-14.20302 c -0.38836,-0.62838 -0.23556,-1.61305 0.33954,-2.18816 1.3439,-1.34389 4.47714,-0.17168 3.93038,1.47045 -0.5566,1.67168 -3.38637,2.14732 -4.26992,0.71771 z m -8.48037,-9.1829 c -12.462,-4.1101 -12.53952,-4.12156 -25.49998,-3.7694 -24.020921,0.65269 -32.338219,0.31756 -37.082166,-1.49417 -5.113999,-1.95305 -8.192504,-6.3647405 -6.485463,-9.2940713 0.566827,-0.972691 1.020091,-1.181447 1.037211,-0.477701 0.01685,0.692606 1.268676,1.2499998 2.807321,1.2499998 1.685814,0 4.868609,1.571672 8.10041,4.0000015 4.221481,3.171961 6.182506,3.999221 9.473089,3.996261 l 4.149585,-0.004 -3.249996,-1.98156 c -3.056252,-1.863441 -4.051566,-3.8760635 -2.623216,-5.3044145 0.794,-0.794 6.188222,1.901516 9.064482,4.5295635 1.858669,1.698271 3.461409,1.980521 10.559493,1.859621 11.30984,-0.19266 20.89052,1.29095 31.97905,4.95208 7.63881,2.52213 11.51931,3.16471 22.05074,3.65141 7.02931,0.32486 13.01836,0.97543 13.30902,1.44571 0.29065,0.47029 -5.2356,0.83436 -12.28056,0.80906 -12.25942,-0.044 -13.34537,-0.2229 -25.30902,-4.16865 z" # noqa + CORNER_VECTOR = 'm 67.791903,64.260958 c -4.308097,-2.07925 -4.086719,-8.29575 0.334943,-9.40552 4.119758,-1.03399 8.732363,5.05239 5.393055,7.1162 -0.55,0.33992 -1,1.04147 -1,1.55902 0,1.59332 2.597425,1.04548 5.365141,-1.1316 1.999416,-1.57274 2.634859,-2.96609 2.634859,-5.7775 0,-9.55787 -9.827495,-13.42961 -24.43221,-9.62556 -3.218823,0.83839 -5.905663,1.40089 -5.970755,1.25 -0.06509,-0.1509 -0.887601,-1.19493 -1.827799,-2.32007 -1.672708,-2.00174 -1.636693,-2.03722 1.675668,-1.65052 1.861815,0.21736 6.685863,-0.35719 10.720107,-1.27678 12.280767,-2.79934 20.195487,-0.0248 22.846932,8.0092 3.187273,9.65753 -6.423297,17.7497 -15.739941,13.25313 z m 49.881417,-20.53932 c -3.19204,-2.701 -3.72967,-6.67376 -1.24009,-9.16334 2.48236,-2.48236 5.35141,-2.67905 7.51523,-0.51523 1.85966,1.85966 2.07045,6.52954 0.37143,8.22857 -2.04025,2.04024 3.28436,1.44595 6.92316,-0.77272 9.66959,-5.89579 0.88581,-18.22422 -13.0777,-18.35516 -5.28594,-0.0496 -10.31098,1.88721 -14.26764,5.4991 -1.98835,1.81509 -2.16454,1.82692 -2.7936,0.18763 -0.40973,-1.06774 0.12141,-2.82197 1.3628,-4.50104 2.46349,-3.33205 1.67564,-4.01299 -2.891784,-2.49938 -2.85998,0.94777 -3.81038,2.05378 -5.59837,6.51495 -1.184469,2.95536 -3.346819,6.86882 -4.805219,8.69657 -1.4584,1.82776 -2.65164,4.02223 -2.65164,4.87662 0,3.24694 -4.442667,0.59094 -5.872557,-3.51085 -1.361274,-3.90495 0.408198,-8.63869 4.404043,-11.78183 5.155844,-4.05558 1.612374,-3.42079 -9.235926,1.65457 -12.882907,6.02725 -16.864953,7.18038 -24.795556,7.18038 -8.471637,0 -13.38802,-1.64157 -17.634617,-5.88816 -2.832233,-2.83224 -3.849773,-4.81378 -4.418121,-8.6038 -1.946289,-12.9787795 8.03227,-20.91713135 19.767685,-15.7259993 5.547225,2.4538018 6.993631,6.1265383 3.999564,10.1557393 -5.468513,7.35914 -15.917883,-0.19431 -10.657807,-7.7041155 1.486298,-2.1219878 1.441784,-2.2225068 -0.984223,-2.2225068 -1.397511,0 -4.010527,1.3130878 -5.806704,2.9179718 -2.773359,2.4779995 -3.265777,3.5977995 -3.265777,7.4266705 0,5.10943 2.254112,8.84197 7.492986,12.40748 8.921325,6.07175 19.286666,5.61396 37.12088,-1.63946 15.35037,-6.24321 21.294999,-7.42408 34.886123,-6.92999 11.77046,0.4279 19.35803,3.05537 24.34054,8.42878 4.97758,5.3681 2.53939,13.58271 -4.86733,16.39873 -4.17361,1.58681 -11.00702,1.19681 -13.31978,-0.76018 z m 26.50156,-0.0787 c -2.26347,-2.50111 -2.07852,-7.36311 0.39995,-10.51398 2.68134,-3.40877 10.49035,-5.69409 18.87656,-5.52426 l 6.5685,0.13301 -7.84029,0.82767 c -8.47925,0.89511 -12.76997,2.82233 -16.03465,7.20213 -1.92294,2.57976 -1.96722,3.00481 -0.57298,5.5 1.00296,1.79495 2.50427,2.81821 4.46514,3.04333 2.92852,0.33623 2.93789,0.32121 1.08045,-1.73124 -1.53602,-1.69728 -1.64654,-2.34411 -0.61324,-3.58916 2.84565,-3.4288 7.14497,-0.49759 5.03976,3.43603 -1.86726,3.48903 -8.65528,4.21532 -11.3692,1.21647 z m -4.17462,-14.20302 c -0.38836,-0.62838 -0.23556,-1.61305 0.33954,-2.18816 1.3439,-1.34389 4.47714,-0.17168 3.93038,1.47045 -0.5566,1.67168 -3.38637,2.14732 -4.26992,0.71771 z m -8.48037,-9.1829 c -12.462,-4.1101 -12.53952,-4.12156 -25.49998,-3.7694 -24.020921,0.65269 -32.338219,0.31756 -37.082166,-1.49417 -5.113999,-1.95305 -8.192504,-6.3647405 -6.485463,-9.2940713 0.566827,-0.972691 1.020091,-1.181447 1.037211,-0.477701 0.01685,0.692606 1.268676,1.2499998 2.807321,1.2499998 1.685814,0 4.868609,1.571672 8.10041,4.0000015 4.221481,3.171961 6.182506,3.999221 9.473089,3.996261 l 4.149585,-0.004 -3.249996,-1.98156 c -3.056252,-1.863441 -4.051566,-3.8760635 -2.623216,-5.3044145 0.794,-0.794 6.188222,1.901516 9.064482,4.5295635 1.858669,1.698271 3.461409,1.980521 10.559493,1.859621 11.30984,-0.19266 20.89052,1.29095 31.97905,4.95208 7.63881,2.52213 11.51931,3.16471 22.05074,3.65141 7.02931,0.32486 13.01836,0.97543 13.30902,1.44571 0.29065,0.47029 -5.2356,0.83436 -12.28056,0.80906 -12.25942,-0.044 -13.34537,-0.2229 -25.30902,-4.16865 z' # noqa: E501 # }}} PATH_CACHE = {} VIEWPORT = (400, 500) diff --git a/src/calibre/ebooks/djvu/__init__.py b/src/calibre/ebooks/djvu/__init__.py index 1d58fcfdd7..9280f81b7d 100644 --- a/src/calibre/ebooks/djvu/__init__.py +++ b/src/calibre/ebooks/djvu/__init__.py @@ -8,4 +8,3 @@ __docformat__ = 'restructuredtext en' ''' Used for DJVU input ''' - diff --git a/src/calibre/ebooks/djvu/djvu.py b/src/calibre/ebooks/djvu/djvu.py index df029e5de9..f7124d7349 100644 --- a/src/calibre/ebooks/djvu/djvu.py +++ b/src/calibre/ebooks/djvu/djvu.py @@ -37,7 +37,7 @@ class DjvuChunk: self.dataend = pos + self.size - (8 if inclheader else 0) if self.type == b'FORM': oldpos, pos = pos, pos+4 - # print oldpos, pos + # print(oldpos, pos) self.subtype = buf[oldpos:pos] # self.headersize += 4 self.datastart = pos @@ -109,7 +109,7 @@ class DJVUFile: def __init__(self, instream, verbose=0): self.instream = instream buf = self.instream.read(4) - assert(buf == b'AT&T') + assert buf == b'AT&T' buf = self.instream.read() self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose) diff --git a/src/calibre/ebooks/djvu/djvubzzdec.py b/src/calibre/ebooks/djvu/djvubzzdec.py index 5748af9682..d518cdee63 100644 --- a/src/calibre/ebooks/djvu/djvubzzdec.py +++ b/src/calibre/ebooks/djvu/djvubzzdec.py @@ -78,15 +78,14 @@ MAXLEN = 1024 ** 2 class BZZDecoderError(Exception): - - """This exception is raised when BZZDecode runs into trouble - """ + '''This exception is raised when BZZDecode runs into trouble + ''' def __init__(self, msg): self.msg = msg def __str__(self): - return "BZZDecoderError: %s" % (self.msg) + return 'BZZDecoderError: %s' % (self.msg) # This table has been designed for the ZPCoder @@ -389,7 +388,7 @@ xmtf = ( # }}} -class BZZDecoder(): +class BZZDecoder: def __init__(self, infile, outfile): self.instream = infile @@ -417,7 +416,7 @@ class BZZDecoder(): # Create machine independent ffz table for i in range(256): j = i - while(j & 0x80): + while j & 0x80: self.ffzt[i] += 1 j <<= 1 # Initialize table @@ -468,7 +467,7 @@ class BZZDecoder(): self.byte = 0xff self.delay -= 1 if self.delay < 1: - raise BZZDecoderError("BiteStream EOF") + raise BZZDecoderError('BiteStream EOF') self.bufint = (self.bufint << 8) | self.byte self.scount += 8 @@ -486,7 +485,7 @@ class BZZDecoder(): if not self.xsize: return 0 if self.xsize > MAXBLOCK * 1024: # 4MB (4096 * 1024) is max block - raise BZZDecoderError("BiteStream.corrupt") + raise BZZDecoderError('BiteStream.corrupt') # Dec11ode Estimation Speed fshift = 0 if self.zpcodec_decoder(): @@ -571,7 +570,7 @@ class BZZDecoder(): # //////// Reconstruct the string if markerpos < 1 or markerpos >= self.xsize: - raise BZZDecoderError("BiteStream.corrupt") + raise BZZDecoderError('BiteStream.corrupt') # Allocate pointers posn = [0] * self.xsize # Prepare count buffer @@ -602,7 +601,7 @@ class BZZDecoder(): i = count[c] + (n & 0xffffff) # Free and check if i != markerpos: - raise BZZDecoderError("BiteStream.corrupt") + raise BZZDecoderError('BiteStream.corrupt') return self.xsize def decode_raw(self, bits): @@ -733,10 +732,10 @@ def main(): import sys from calibre_extensions import bzzdec as d - with open(sys.argv[1], "rb") as f: + with open(sys.argv[1], 'rb') as f: raw = f.read() print(d.decompress(raw)) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/src/calibre/ebooks/docx/__init__.py b/src/calibre/ebooks/docx/__init__.py index fd10297f12..fc5e470722 100644 --- a/src/calibre/ebooks/docx/__init__.py +++ b/src/calibre/ebooks/docx/__init__.py @@ -7,4 +7,3 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' class InvalidDOCX(ValueError): pass - diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index 6e5f548cd8..258a244581 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -88,7 +88,7 @@ LINE_STYLES = { # {{{ 'thick': 'solid', 'thickThinLargeGap': 'double', 'thickThinMediumGap': 'double', - 'thickThinSmallGap' : 'double', + 'thickThinSmallGap': 'double', 'thinThickLargeGap': 'double', 'thinThickMediumGap': 'double', 'thinThickSmallGap': 'double', @@ -128,7 +128,7 @@ def read_single_border(parent, edge, XPath, get): width = min(96, max(2, float(sz))) / 8 except (ValueError, TypeError): pass - return {p:v for p, v in zip(border_props, (padding, width, style, color))} + return dict(zip(border_props, (padding, width, style, color))) def read_border(parent, dest, XPath, get, border_edges=border_edges, name='pBdr'): diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index 1cc7f5ba2a..8bc673eb91 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -6,7 +6,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' from collections import OrderedDict -from calibre.ebooks.docx.block_styles import LINE_STYLES, binary_property, inherit, read_shd, simple_color, simple_float # noqa +from calibre.ebooks.docx.block_styles import LINE_STYLES, binary_property, inherit, read_shd, simple_color, simple_float # Read from XML {{{ diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 5507a20772..a4fb122566 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -231,7 +231,7 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid): with open(path, 'rb') as imf: fmt, width, height = identify(imf) except: - width, height, fmt = 0, 0, None # noqa + width, height, fmt = 0, 0, None del fmt try: is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000 diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index bdea8cdb25..b7157d9749 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -25,8 +25,8 @@ from calibre.utils.zipfile import ZipFile def fromstring(raw, parser=None): return safe_xml_fromstring(raw) -# Read metadata {{{ +# Read metadata {{{ def read_doc_props(raw, mi, XPath): root = fromstring(raw) diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py index 2e566571dc..5de53987fd 100644 --- a/src/calibre/ebooks/docx/dump.py +++ b/src/calibre/ebooks/docx/dump.py @@ -17,7 +17,7 @@ from calibre.utils.zipfile import ZipFile def pretty_all_xml_in_dir(path): for f in walk(path): - if f.endswith('.xml') or f.endswith('.rels'): + if f.endswith(('.xml', '.rels')): with open(f, 'r+b') as stream: raw = stream.read() if raw: diff --git a/src/calibre/ebooks/docx/fonts.py b/src/calibre/ebooks/docx/fonts.py index b6733323f9..81b5cfc1f9 100644 --- a/src/calibre/ebooks/docx/fonts.py +++ b/src/calibre/ebooks/docx/fonts.py @@ -96,15 +96,15 @@ class Family: SYMBOL_MAPS = { # {{{ - 'Wingdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🖉', '✂', '✁', '👓', '🕭', '🕮', '🕯', '🕿', '✆', '🖂', '🖃', '📪', '📫', '📬', '📭', '🗀', '🗁', '🗎', '🗏', '🗐', '🗄', '⏳', '🖮', '🖰', '🖲', '🖳', '🖴', '🖫', '🖬', '✇', '✍', '🖎', '✌', '🖏', '👍', '👎', '☜', '☞', '☜', '🖗', '🖐', '☺', '😐', '☹', '💣', '🕱', '🏳', '🏱', '✈', '☼', '🌢', '❄', '🕆', '✞', '🕈', '✠', '✡', '☪', '☯', '🕉', '☸', '♈', '♉', '♊', '♋', '♌', '♍', '♎', '♏', '♐', '♑', '♒', '♓', '🙰', '🙵', '⚫', '🔾', '◼', '🞏', '🞐', '❑', '❒', '🞟', '⧫', '◆', '❖', '🞙', '⌧', '⮹', '⌘', '🏵', '🏶', '🙶', '🙷', ' ', '🄋', '➀', '➁', '➂', '➃', '➄', '➅', '➆', '➇', '➈', '➉', '🄌', '➊', '➋', '➌', '➍', '➎', '➏', '➐', '➑', '➒', '➓', '🙢', '🙠', '🙡', '🙣', '🙦', '🙤', '🙥', '🙧', '∙', '•', '⬝', '⭘', '🞆', '🞈', '🞊', '🞋', '🔿', '▪', '🞎', '🟀', '🟁', '★', '🟋', '🟏', '🟓', '🟑', '⯐', '⌖', '⯎', '⯏', '⯑', '✪', '✰', '🕐', '🕑', '🕒', '🕓', '🕔', '🕕', '🕖', '🕗', '🕘', '🕙', '🕚', '🕛', '⮰', '⮱', '⮲', '⮳', '⮴', '⮵', '⮶', '⮷', '🙪', '🙫', '🙕', '🙔', '🙗', '🙖', '🙐', '🙑', '🙒', '🙓', '⌫', '⌦', '⮘', '⮚', '⮙', '⮛', '⮈', '⮊', '⮉', '⮋', '🡨', '🡪', '🡩', '🡫', '🡬', '🡭', '🡯', '🡮', '🡸', '🡺', '🡹', '🡻', '🡼', '🡽', '🡿', '🡾', '⇦', '⇨', '⇧', '⇩', '⬄', '⇳', '⬁', '⬀', '⬃', '⬂', '🢬', '🢭', '🗶', '✓', '🗷', '🗹', ' '), # noqa + 'Wingdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🖉', '✂', '✁', '👓', '🕭', '🕮', '🕯', '🕿', '✆', '🖂', '🖃', '📪', '📫', '📬', '📭', '🗀', '🗁', '🗎', '🗏', '🗐', '🗄', '⏳', '🖮', '🖰', '🖲', '🖳', '🖴', '🖫', '🖬', '✇', '✍', '🖎', '✌', '🖏', '👍', '👎', '☜', '☞', '☜', '🖗', '🖐', '☺', '😐', '☹', '💣', '🕱', '🏳', '🏱', '✈', '☼', '🌢', '❄', '🕆', '✞', '🕈', '✠', '✡', '☪', '☯', '🕉', '☸', '♈', '♉', '♊', '♋', '♌', '♍', '♎', '♏', '♐', '♑', '♒', '♓', '🙰', '🙵', '⚫', '🔾', '◼', '🞏', '🞐', '❑', '❒', '🞟', '⧫', '◆', '❖', '🞙', '⌧', '⮹', '⌘', '🏵', '🏶', '🙶', '🙷', ' ', '🄋', '➀', '➁', '➂', '➃', '➄', '➅', '➆', '➇', '➈', '➉', '🄌', '➊', '➋', '➌', '➍', '➎', '➏', '➐', '➑', '➒', '➓', '🙢', '🙠', '🙡', '🙣', '🙦', '🙤', '🙥', '🙧', '∙', '•', '⬝', '⭘', '🞆', '🞈', '🞊', '🞋', '🔿', '▪', '🞎', '🟀', '🟁', '★', '🟋', '🟏', '🟓', '🟑', '⯐', '⌖', '⯎', '⯏', '⯑', '✪', '✰', '🕐', '🕑', '🕒', '🕓', '🕔', '🕕', '🕖', '🕗', '🕘', '🕙', '🕚', '🕛', '⮰', '⮱', '⮲', '⮳', '⮴', '⮵', '⮶', '⮷', '🙪', '🙫', '🙕', '🙔', '🙗', '🙖', '🙐', '🙑', '🙒', '🙓', '⌫', '⌦', '⮘', '⮚', '⮙', '⮛', '⮈', '⮊', '⮉', '⮋', '🡨', '🡪', '🡩', '🡫', '🡬', '🡭', '🡯', '🡮', '🡸', '🡺', '🡹', '🡻', '🡼', '🡽', '🡿', '🡾', '⇦', '⇨', '⇧', '⇩', '⬄', '⇳', '⬁', '⬀', '⬃', '⬂', '🢬', '🢭', '🗶', '✓', '🗷', '🗹', ' '), # noqa: E501 - 'Wingdings 2': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🖊', '🖋', '🖌', '🖍', '✄', '✀', '🕾', '🕽', '🗅', '🗆', '🗇', '🗈', '🗉', '🗊', '🗋', '🗌', '🗍', '📋', '🗑', '🗔', '🖵', '🖶', '🖷', '🖸', '🖭', '🖯', '🖱', '🖒', '🖓', '🖘', '🖙', '🖚', '🖛', '👈', '👉', '🖜', '🖝', '🖞', '🖟', '🖠', '🖡', '👆', '👇', '🖢', '🖣', '🖑', '🗴', '🗸', '🗵', '☑', '⮽', '☒', '⮾', '⮿', '🛇', '⦸', '🙱', '🙴', '🙲', '🙳', '‽', '🙹', '🙺', '🙻', '🙦', '🙤', '🙥', '🙧', '🙚', '🙘', '🙙', '🙛', '⓪', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '⓿', '❶', '❷', '❸', '❹', '❺', '❻', '❼', '❽', '❾', '❿', ' ', '☉', '🌕', '☽', '☾', '⸿', '✝', '🕇', '🕜', '🕝', '🕞', '🕟', '🕠', '🕡', '🕢', '🕣', '🕤', '🕥', '🕦', '🕧', '🙨', '🙩', '⋅', '🞄', '⦁', '●', '●', '🞅', '🞇', '🞉', '⊙', '⦿', '🞌', '🞍', '◾', '■', '□', '🞑', '🞒', '🞓', '🞔', '▣', '🞕', '🞖', '🞗', '🞘', '⬩', '⬥', '◇', '🞚', '◈', '🞛', '🞜', '🞝', '🞞', '⬪', '⬧', '◊', '🞠', '◖', '◗', '⯊', '⯋', '⯀', '⯁', '⬟', '⯂', '⬣', '⬢', '⯃', '⯄', '🞡', '🞢', '🞣', '🞤', '🞥', '🞦', '🞧', '🞨', '🞩', '🞪', '🞫', '🞬', '🞭', '🞮', '🞯', '🞰', '🞱', '🞲', '🞳', '🞴', '🞵', '🞶', '🞷', '🞸', '🞹', '🞺', '🞻', '🞼', '🞽', '🞾', '🞿', '🟀', '🟂', '🟄', '🟆', '🟉', '🟊', '✶', '🟌', '🟎', '🟐', '🟒', '✹', '🟃', '🟇', '✯', '🟍', '🟔', '⯌', '⯍', '※', '⁂', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa + 'Wingdings 2': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🖊', '🖋', '🖌', '🖍', '✄', '✀', '🕾', '🕽', '🗅', '🗆', '🗇', '🗈', '🗉', '🗊', '🗋', '🗌', '🗍', '📋', '🗑', '🗔', '🖵', '🖶', '🖷', '🖸', '🖭', '🖯', '🖱', '🖒', '🖓', '🖘', '🖙', '🖚', '🖛', '👈', '👉', '🖜', '🖝', '🖞', '🖟', '🖠', '🖡', '👆', '👇', '🖢', '🖣', '🖑', '🗴', '🗸', '🗵', '☑', '⮽', '☒', '⮾', '⮿', '🛇', '⦸', '🙱', '🙴', '🙲', '🙳', '‽', '🙹', '🙺', '🙻', '🙦', '🙤', '🙥', '🙧', '🙚', '🙘', '🙙', '🙛', '⓪', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '⓿', '❶', '❷', '❸', '❹', '❺', '❻', '❼', '❽', '❾', '❿', ' ', '☉', '🌕', '☽', '☾', '⸿', '✝', '🕇', '🕜', '🕝', '🕞', '🕟', '🕠', '🕡', '🕢', '🕣', '🕤', '🕥', '🕦', '🕧', '🙨', '🙩', '⋅', '🞄', '⦁', '●', '●', '🞅', '🞇', '🞉', '⊙', '⦿', '🞌', '🞍', '◾', '■', '□', '🞑', '🞒', '🞓', '🞔', '▣', '🞕', '🞖', '🞗', '🞘', '⬩', '⬥', '◇', '🞚', '◈', '🞛', '🞜', '🞝', '🞞', '⬪', '⬧', '◊', '🞠', '◖', '◗', '⯊', '⯋', '⯀', '⯁', '⬟', '⯂', '⬣', '⬢', '⯃', '⯄', '🞡', '🞢', '🞣', '🞤', '🞥', '🞦', '🞧', '🞨', '🞩', '🞪', '🞫', '🞬', '🞭', '🞮', '🞯', '🞰', '🞱', '🞲', '🞳', '🞴', '🞵', '🞶', '🞷', '🞸', '🞹', '🞺', '🞻', '🞼', '🞽', '🞾', '🞿', '🟀', '🟂', '🟄', '🟆', '🟉', '🟊', '✶', '🟌', '🟎', '🟐', '🟒', '✹', '🟃', '🟇', '✯', '🟍', '🟔', '⯌', '⯍', '※', '⁂', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa: E501 - 'Wingdings 3': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '⭠', '⭢', '⭡', '⭣', '⭤', '⭥', '⭧', '⭦', '⭰', '⭲', '⭱', '⭳', '⭶', '⭸', '⭻', '⭽', '⭤', '⭥', '⭪', '⭬', '⭫', '⭭', '⭍', '⮠', '⮡', '⮢', '⮣', '⮤', '⮥', '⮦', '⮧', '⮐', '⮑', '⮒', '⮓', '⮀', '⮃', '⭾', '⭿', '⮄', '⮆', '⮅', '⮇', '⮏', '⮍', '⮎', '⮌', '⭮', '⭯', '⎋', '⌤', '⌃', '⌥', '␣', '⍽', '⇪', '⮸', '🢠', '🢡', '🢢', '🢣', '🢤', '🢥', '🢦', '🢧', '🢨', '🢩', '🢪', '🢫', '🡐', '🡒', '🡑', '🡓', '🡔', '🡕', '🡗', '🡖', '🡘', '🡙', '▲', '▼', '△', '▽', '◀', '▶', '◁', '▷', '◣', '◢', '◤', '◥', '🞀', '🞂', '🞁', ' ', '🞃', '⯅', '⯆', '⯇', '⯈', '⮜', '⮞', '⮝', '⮟', '🠐', '🠒', '🠑', '🠓', '🠔', '🠖', '🠕', '🠗', '🠘', '🠚', '🠙', '🠛', '🠜', '🠞', '🠝', '🠟', '🠀', '🠂', '🠁', '🠃', '🠄', '🠆', '🠅', '🠇', '🠈', '🠊', '🠉', '🠋', '🠠', '🠢', '🠤', '🠦', '🠨', '🠪', '🠬', '🢜', '🢝', '🢞', '🢟', '🠮', '🠰', '🠲', '🠴', '🠶', '🠸', '🠺', '🠹', '🠻', '🢘', '🢚', '🢙', '🢛', '🠼', '🠾', '🠽', '🠿', '🡀', '🡂', '🡁', '🡃', '🡄', '🡆', '🡅', '🡇', '⮨', '⮩', '⮪', '⮫', '⮬', '⮭', '⮮', '⮯', '🡠', '🡢', '🡡', '🡣', '🡤', '🡥', '🡧', '🡦', '🡰', '🡲', '🡱', '🡳', '🡴', '🡵', '🡷', '🡶', '🢀', '🢂', '🢁', '🢃', '🢄', '🢅', '🢇', '🢆', '🢐', '🢒', '🢑', '🢓', '🢔', '🢕', '🢗', '🢖', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa + 'Wingdings 3': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '⭠', '⭢', '⭡', '⭣', '⭤', '⭥', '⭧', '⭦', '⭰', '⭲', '⭱', '⭳', '⭶', '⭸', '⭻', '⭽', '⭤', '⭥', '⭪', '⭬', '⭫', '⭭', '⭍', '⮠', '⮡', '⮢', '⮣', '⮤', '⮥', '⮦', '⮧', '⮐', '⮑', '⮒', '⮓', '⮀', '⮃', '⭾', '⭿', '⮄', '⮆', '⮅', '⮇', '⮏', '⮍', '⮎', '⮌', '⭮', '⭯', '⎋', '⌤', '⌃', '⌥', '␣', '⍽', '⇪', '⮸', '🢠', '🢡', '🢢', '🢣', '🢤', '🢥', '🢦', '🢧', '🢨', '🢩', '🢪', '🢫', '🡐', '🡒', '🡑', '🡓', '🡔', '🡕', '🡗', '🡖', '🡘', '🡙', '▲', '▼', '△', '▽', '◀', '▶', '◁', '▷', '◣', '◢', '◤', '◥', '🞀', '🞂', '🞁', ' ', '🞃', '⯅', '⯆', '⯇', '⯈', '⮜', '⮞', '⮝', '⮟', '🠐', '🠒', '🠑', '🠓', '🠔', '🠖', '🠕', '🠗', '🠘', '🠚', '🠙', '🠛', '🠜', '🠞', '🠝', '🠟', '🠀', '🠂', '🠁', '🠃', '🠄', '🠆', '🠅', '🠇', '🠈', '🠊', '🠉', '🠋', '🠠', '🠢', '🠤', '🠦', '🠨', '🠪', '🠬', '🢜', '🢝', '🢞', '🢟', '🠮', '🠰', '🠲', '🠴', '🠶', '🠸', '🠺', '🠹', '🠻', '🢘', '🢚', '🢙', '🢛', '🠼', '🠾', '🠽', '🠿', '🡀', '🡂', '🡁', '🡃', '🡄', '🡆', '🡅', '🡇', '⮨', '⮩', '⮪', '⮫', '⮬', '⮭', '⮮', '⮯', '🡠', '🡢', '🡡', '🡣', '🡤', '🡥', '🡧', '🡦', '🡰', '🡲', '🡱', '🡳', '🡴', '🡵', '🡷', '🡶', '🢀', '🢂', '🢁', '🢃', '🢄', '🢅', '🢇', '🢆', '🢐', '🢒', '🢑', '🢓', '🢔', '🢕', '🢗', '🢖', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa: E501 - 'Webdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🕷', '🕸', '🕲', '🕶', '🏆', '🎖', '🖇', '🗨', '🗩', '🗰', '🗱', '🌶', '🎗', '🙾', '🙼', '🗕', '🗖', '🗗', '⏴', '⏵', '⏶', '⏷', '⏪', '⏩', '⏮', '⏭', '⏸', '⏹', '⏺', '🗚', '🗳', '🛠', '🏗', '🏘', '🏙', '🏚', '🏜', '🏭', '🏛', '🏠', '🏖', '🏝', '🛣', '🔍', '🏔', '👁', '👂', '🏞', '🏕', '🛤', '🏟', '🛳', '🕬', '🕫', '🕨', '🔈', '🎔', '🎕', '🗬', '🙽', '🗭', '🗪', '🗫', '⮔', '✔', '🚲', '⬜', '🛡', '📦', '🛱', '⬛', '🚑', '🛈', '🛩', '🛰', '🟈', '🕴', '⬤', '🛥', '🚔', '🗘', '🗙', '❓', '🛲', '🚇', '🚍', '⛳', '⦸', '⊖', '🚭', '🗮', '⏐', '🗯', '🗲', ' ', '🚹', '🚺', '🛉', '🛊', '🚼', '👽', '🏋', '⛷', '🏂', '🏌', '🏊', '🏄', '🏍', '🏎', '🚘', '🗠', '🛢', '📠', '🏷', '📣', '👪', '🗡', '🗢', '🗣', '✯', '🖄', '🖅', '🖃', '🖆', '🖹', '🖺', '🖻', '🕵', '🕰', '🖽', '🖾', '📋', '🗒', '🗓', '🕮', '📚', '🗞', '🗟', '🗃', '🗂', '🖼', '🎭', '🎜', '🎘', '🎙', '🎧', '💿', '🎞', '📷', '🎟', '🎬', '📽', '📹', '📾', '📻', '🎚', '🎛', '📺', '💻', '🖥', '🖦', '🖧', '🍹', '🎮', '🎮', '🕻', '🕼', '🖁', '🖀', '🖨', '🖩', '🖿', '🖪', '🗜', '🔒', '🔓', '🗝', '📥', '📤', '🕳', '🌣', '🌤', '🌥', '🌦', '☁', '🌨', '🌧', '🌩', '🌪', '🌬', '🌫', '🌜', '🌡', '🛋', '🛏', '🍽', '🍸', '🛎', '🛍', 'Ⓟ', '♿', '🛆', '🖈', '🎓', '🗤', '🗥', '🗦', '🗧', '🛪', '🐿', '🐦', '🐟', '🐕', '🐈', '🙬', '🙮', '🙭', '🙯', '🗺', '🌍', '🌏', '🌎', '🕊',), # noqa + 'Webdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '🕷', '🕸', '🕲', '🕶', '🏆', '🎖', '🖇', '🗨', '🗩', '🗰', '🗱', '🌶', '🎗', '🙾', '🙼', '🗕', '🗖', '🗗', '⏴', '⏵', '⏶', '⏷', '⏪', '⏩', '⏮', '⏭', '⏸', '⏹', '⏺', '🗚', '🗳', '🛠', '🏗', '🏘', '🏙', '🏚', '🏜', '🏭', '🏛', '🏠', '🏖', '🏝', '🛣', '🔍', '🏔', '👁', '👂', '🏞', '🏕', '🛤', '🏟', '🛳', '🕬', '🕫', '🕨', '🔈', '🎔', '🎕', '🗬', '🙽', '🗭', '🗪', '🗫', '⮔', '✔', '🚲', '⬜', '🛡', '📦', '🛱', '⬛', '🚑', '🛈', '🛩', '🛰', '🟈', '🕴', '⬤', '🛥', '🚔', '🗘', '🗙', '❓', '🛲', '🚇', '🚍', '⛳', '⦸', '⊖', '🚭', '🗮', '⏐', '🗯', '🗲', ' ', '🚹', '🚺', '🛉', '🛊', '🚼', '👽', '🏋', '⛷', '🏂', '🏌', '🏊', '🏄', '🏍', '🏎', '🚘', '🗠', '🛢', '📠', '🏷', '📣', '👪', '🗡', '🗢', '🗣', '✯', '🖄', '🖅', '🖃', '🖆', '🖹', '🖺', '🖻', '🕵', '🕰', '🖽', '🖾', '📋', '🗒', '🗓', '🕮', '📚', '🗞', '🗟', '🗃', '🗂', '🖼', '🎭', '🎜', '🎘', '🎙', '🎧', '💿', '🎞', '📷', '🎟', '🎬', '📽', '📹', '📾', '📻', '🎚', '🎛', '📺', '💻', '🖥', '🖦', '🖧', '🍹', '🎮', '🎮', '🕻', '🕼', '🖁', '🖀', '🖨', '🖩', '🖿', '🖪', '🗜', '🔒', '🔓', '🗝', '📥', '📤', '🕳', '🌣', '🌤', '🌥', '🌦', '☁', '🌨', '🌧', '🌩', '🌪', '🌬', '🌫', '🌜', '🌡', '🛋', '🛏', '🍽', '🍸', '🛎', '🛍', 'Ⓟ', '♿', '🛆', '🖈', '🎓', '🗤', '🗥', '🗦', '🗧', '🛪', '🐿', '🐦', '🐟', '🐕', '🐈', '🙬', '🙮', '🙭', '🙯', '🗺', '🌍', '🌏', '🌎', '🕊',), # noqa: E501 - 'Symbol': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '!', '∀', '#', '∃', '%', '&', '∍', '(', ')', '*', '+', ',', '−', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '≅', 'Α', 'Β', 'Χ', 'Δ', 'Ε', 'Φ', 'Γ', 'Η', 'Ι', 'ϑ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Θ', 'Ρ', 'Σ', 'Τ', 'Υ', 'ς', 'Ω', 'Ξ', 'Ψ', 'Ζ', '[', '∴', ']', '⊥', '_', '', 'α', 'β', 'χ', 'δ', 'ε', 'φ', 'γ', 'η', 'ι', 'ϕ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'θ', 'ρ', 'σ', 'τ', 'υ', 'ϖ', 'ω', 'ξ', 'ψ', 'ζ', '{', '|', '}', '~', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '€', 'ϒ', '′', '≤', '⁄', '∞', 'ƒ', '♣', '♥', '♦', '♠', '↔', '←', '↑', '→', '↓', '°', '±', '″', '≥', '×', '∝', '∂', '•', '÷', '≠', '≡', '≈', '…', '⏐', '⎯', '↲', 'ℵ', 'ℑ', 'ℜ', '℘', '⊗', '⊕', '∅', '∩', '∪', '⊃', '⊇', '⊄', '⊂', '⊆', '∈', '∉', '∠', '∂', '®', '©', '™', '∏', '√', '⋅', '¬', '∦', '∧', '⇔', '⇐', '⇑', '⇒', '⇓', '◊', '〈', '®', '©', '™', '∑', '⎛', '⎜', '⎝', '⎡', '⎢', '⎣', '⎧', '⎨', '⎩', '⎪', ' ', '〉', '∫', '⌠', '⎮', '⌡', '⎞', '⎟', '⎠', '⎤', '⎥', '⎦', '⎪', '⎫', '⎬', ' ',), # noqa + 'Symbol': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '!', '∀', '#', '∃', '%', '&', '∍', '(', ')', '*', '+', ',', '−', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '≅', 'Α', 'Β', 'Χ', 'Δ', 'Ε', 'Φ', 'Γ', 'Η', 'Ι', 'ϑ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Θ', 'Ρ', 'Σ', 'Τ', 'Υ', 'ς', 'Ω', 'Ξ', 'Ψ', 'Ζ', '[', '∴', ']', '⊥', '_', '', 'α', 'β', 'χ', 'δ', 'ε', 'φ', 'γ', 'η', 'ι', 'ϕ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'θ', 'ρ', 'σ', 'τ', 'υ', 'ϖ', 'ω', 'ξ', 'ψ', 'ζ', '{', '|', '}', '~', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '€', 'ϒ', '′', '≤', '⁄', '∞', 'ƒ', '♣', '♥', '♦', '♠', '↔', '←', '↑', '→', '↓', '°', '±', '″', '≥', '×', '∝', '∂', '•', '÷', '≠', '≡', '≈', '…', '⏐', '⎯', '↲', 'ℵ', 'ℑ', 'ℜ', '℘', '⊗', '⊕', '∅', '∩', '∪', '⊃', '⊇', '⊄', '⊂', '⊆', '∈', '∉', '∠', '∂', '®', '©', '™', '∏', '√', '⋅', '¬', '∦', '∧', '⇔', '⇐', '⇑', '⇒', '⇓', '◊', '〈', '®', '©', '™', '∑', '⎛', '⎜', '⎝', '⎡', '⎢', '⎣', '⎧', '⎨', '⎩', '⎪', ' ', '〉', '∫', '⌠', '⎮', '⌡', '⎞', '⎟', '⎠', '⎤', '⎥', '⎦', '⎪', '⎫', '⎬', ' ',), # noqa: E501 } # }}} SYMBOL_FONT_NAMES = frozenset(n.lower() for n in SYMBOL_MAPS) diff --git a/src/calibre/ebooks/docx/index.py b/src/calibre/ebooks/docx/index.py index a00b158392..39d9913989 100644 --- a/src/calibre/ebooks/docx/index.py +++ b/src/calibre/ebooks/docx/index.py @@ -106,7 +106,7 @@ def process_index(field, index, xe_fields, log, XPath, expand): if styles: heading_style = styles[0] else: - items = sorted(xe_fields, key=lambda x:sort_key(x['text'])) + items = sorted(xe_fields, key=lambda x: sort_key(x['text'])) hyperlinks = [] blocks = [] @@ -143,7 +143,7 @@ def split_up_block(block, a, text, parts, ldict): ldict[span] = len(prefix) -""" +''' The merge algorithm is a little tricky. We start with a list of elementary blocks. Each is an HtmlElement, a p node with a list of child nodes. The last child may be a link, and the earlier ones are @@ -175,7 +175,7 @@ If we find such a matching entry, go back to the start with (p ... pk+1) and (n If there is no matching entry, then because of the original reversed order we want to insert nk+1 and all following entries from n into p immediately following pk. -""" +''' def find_match(prev_block, pind, nextent, ldict): @@ -208,7 +208,7 @@ def add_link(pent, nent, ldict): p.insert(p.index(pa) + 1, na) else: # substitute link na for plain text in pent - pent.text = "" + pent.text = '' pent.append(na) diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 702177f2b4..34c1d9951d 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -13,24 +13,24 @@ from polyglot.builtins import iteritems # Names {{{ TRANSITIONAL_NAMES = { - 'DOCUMENT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument', - 'DOCPROPS' : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties', - 'APPPROPS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties', - 'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles', - 'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering', - 'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable', - 'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font', - 'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', - 'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink', - 'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes', - 'ENDNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes', - 'THEMES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme', - 'SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings', + 'DOCUMENT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument', + 'DOCPROPS' : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties', + 'APPPROPS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties', + 'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles', + 'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering', + 'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable', + 'EMBEDDED_FONT': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font', + 'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', + 'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink', + 'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes', + 'ENDNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes', + 'THEMES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme', + 'SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings', 'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings', } STRICT_NAMES = { - k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument') + k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument') for k, v in iteritems(TRANSITIONAL_NAMES) } diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index a7cf7451bf..37138e3a96 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -39,7 +39,7 @@ def alphabet(val, lower=True): alphabet_map = { 'lower-alpha':alphabet, 'upper-alpha':partial(alphabet, lower=False), - 'lower-roman':lambda x:roman(x).lower(), 'upper-roman':roman, + 'lower-roman':lambda x: roman(x).lower(), 'upper-roman':roman, 'decimal-leading-zero': lambda x: '0%d' % x } @@ -361,7 +361,7 @@ class Numbering: if child.tag == 'li': if current_run: last = current_run[-1] - if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')): + if (last.get('list-id'), last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')): commit(current_run) current_run.append(child) else: diff --git a/src/calibre/ebooks/docx/settings.py b/src/calibre/ebooks/docx/settings.py index d0a6823947..5276e7b517 100644 --- a/src/calibre/ebooks/docx/settings.py +++ b/src/calibre/ebooks/docx/settings.py @@ -17,4 +17,3 @@ class Settings: self.default_tab_stop = int(self.namespace.get(dts, 'w:val')) / 20 except (ValueError, TypeError, AttributeError): pass - diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index fe48b380a7..ed7aea361a 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -14,7 +14,6 @@ from polyglot.builtins import iteritems, itervalues class PageProperties: - ''' Class representing page level properties (page size/margins) read from sectPr elements. @@ -104,7 +103,6 @@ class Style: class Styles: - ''' Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup. ''' @@ -501,7 +499,7 @@ class Styles: prefix = ef + '\n' + prefix ans = [] - for (cls, css) in sorted(itervalues(self.classes), key=lambda x:x[0]): + for cls, css in sorted(itervalues(self.classes), key=lambda x:x[0]): b = (f'\t{k}: {v};' for k, v in iteritems(css)) b = '\n'.join(b) ans.append('.{} {{\n{}\n}}\n'.format(cls, b.rstrip(';'))) diff --git a/src/calibre/ebooks/docx/tables.py b/src/calibre/ebooks/docx/tables.py index 11e657e2a9..1a00af4456 100644 --- a/src/calibre/ebooks/docx/tables.py +++ b/src/calibre/ebooks/docx/tables.py @@ -280,7 +280,7 @@ class CellStyle(Style): for x in edges: val = getattr(self, 'cell_padding_%s' % x) if val not in (inherit, 'auto'): - c['padding-%s' % x] = val + c['padding-%s' % x] = val elif val is inherit and x in {'left', 'right'}: c['padding-%s' % x] = '%.3gpt' % (115/20) # In Word, tables are apparently rendered with some default top and diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 54b9578573..4560006c30 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -683,7 +683,7 @@ class Convert: multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: - text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) + text.add_elem(SPAN(ctext, style='white-space:pre-wrap')) ans.append(text.elem) else: text.buf.append(ctext) diff --git a/src/calibre/ebooks/docx/writer/__init__.py b/src/calibre/ebooks/docx/writer/__init__.py index 0cbf055ec8..fb2e4b74e1 100644 --- a/src/calibre/ebooks/docx/writer/__init__.py +++ b/src/calibre/ebooks/docx/writer/__init__.py @@ -3,6 +3,3 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' - - - diff --git a/src/calibre/ebooks/docx/writer/container.py b/src/calibre/ebooks/docx/writer/container.py index 0b8bff1917..03f2ea90e7 100644 --- a/src/calibre/ebooks/docx/writer/container.py +++ b/src/calibre/ebooks/docx/writer/container.py @@ -70,7 +70,7 @@ def create_skeleton(opts, namespaces=None): E.pgSz(**{w('w'):str(width), w('h'):str(height)}), E.pgMar(**dict(map(margin, 'left top right bottom'.split()))), E.cols(**{w('space'):'720'}), - E.docGrid(**{w('linePitch'):"360"}), + E.docGrid(**{w('linePitch'):'360'}), )) dn = {k:v for k, v in iteritems(namespaces) if k in tuple('wra') + ('wp',)} @@ -79,15 +79,15 @@ def create_skeleton(opts, namespaces=None): E.docDefaults( E.rPrDefault( E.rPr( - E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}), + E.rFonts(**{w('asciiTheme'):'minorHAnsi', w('eastAsiaTheme'):'minorEastAsia', w('hAnsiTheme'):'minorHAnsi', w('cstheme'):'minorBidi'}), E.sz(**{w('val'):'22'}), E.szCs(**{w('val'):'22'}), - E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"}) + E.lang(**{w('val'):'en-US', w('eastAsia'):'en-US', w('bidi'):'ar-SA'}) ) ), E.pPrDefault( E.pPr( - E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"}) + E.spacing(**{w('after'):'0', w('line'):'276', w('lineRule'):'auto'}) ) ) ) @@ -173,25 +173,25 @@ class DOCX: E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']}) types = E.Types() for partname, mt in iteritems({ - "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml", - "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml", - "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml", - "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml", - "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml", - "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml", - "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml", - "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml", - "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml", - "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml", - "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml", + '/word/footnotes.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml', + '/word/document.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml', + '/word/numbering.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml', + '/word/styles.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml', + '/word/endnotes.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml', + '/word/settings.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml', + '/word/theme/theme1.xml': 'application/vnd.openxmlformats-officedocument.theme+xml', + '/word/fontTable.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml', + '/word/webSettings.xml': 'application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml', + '/docProps/core.xml': 'application/vnd.openxmlformats-package.core-properties+xml', + '/docProps/app.xml': 'application/vnd.openxmlformats-officedocument.extended-properties+xml', }): types.append(E.Override(PartName=partname, ContentType=mt)) added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'} for ext in added: types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0])) for ext, mt in iteritems({ - "rels": "application/vnd.openxmlformats-package.relationships+xml", - "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont", + 'rels': 'application/vnd.openxmlformats-package.relationships+xml', + 'odttf': 'application/vnd.openxmlformats-officedocument.obfuscatedFont', }): added.add(ext) types.append(E.Default(Extension=ext, ContentType=mt)) @@ -242,7 +242,7 @@ class DOCX: def convert_metadata(self, mi): namespaces = self.namespace.namespaces E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()}) - cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre')) + cp = E.coreProperties(E.revision('1'), E.lastModifiedBy('calibre')) ts = utcnow().isoformat(native_string_type('T')).rpartition('.')[0] + 'Z' for x in 'created modified'.split(): x = cp.makeelement('{{{}}}{}'.format(namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'}) diff --git a/src/calibre/ebooks/docx/writer/fonts.py b/src/calibre/ebooks/docx/writer/fonts.py index de02ad4eec..22e3989f2c 100644 --- a/src/calibre/ebooks/docx/writer/fonts.py +++ b/src/calibre/ebooks/docx/writer/fonts.py @@ -73,4 +73,4 @@ class FontsManager: font_data_map['word/' + fname] = obfuscate_font_data(item.data, key) makeelement(font, 'w:embed' + tag, r_id=rid, w_fontKey='{%s}' % key.urn.rpartition(':')[-1].upper(), - w_subsetted="true" if self.opts.subset_embedded_fonts else "false") + w_subsetted='true' if self.opts.subset_embedded_fonts else 'false') diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index b82e6ca7aa..c0b53ad756 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -62,7 +62,7 @@ class TextRun: self.first_html_parent = first_html_parent if self.ws_pat is None: TextRun.ws_pat = self.ws_pat = re.compile(r'\s+') - TextRun.soft_hyphen_pat = self.soft_hyphen_pat = re.compile('(\u00ad)') + TextRun.soft_hyphen_pat = self.soft_hyphen_pat = re.compile(r'(\u00ad)') self.style = style self.texts = [] self.link = None diff --git a/src/calibre/ebooks/docx/writer/images.py b/src/calibre/ebooks/docx/writer/images.py index 5e4d6d946d..214c014efc 100644 --- a/src/calibre/ebooks/docx/writer/images.py +++ b/src/calibre/ebooks/docx/writer/images.py @@ -145,8 +145,8 @@ class ImagesManager: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them - parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") - parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') + parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc','0'), parent.set('locked', '0') + parent.set('layoutInCell', '1'), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' @@ -169,7 +169,7 @@ class ImagesManager: def create_docx_image_markup(self, parent, name, alt, img_rid, width, height, svg_rid=''): makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt) - makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") + makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect='1') g = makeelement(parent, 'a:graphic') gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) pic = makeelement(gd, 'pic:pic') @@ -231,8 +231,8 @@ class ImagesManager: root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) parent = makeelement(ans, 'wp:anchor', **{'dist'+edge:'0' for edge in 'LRTB'}) - parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") - parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') + parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc','0'), parent.set('locked', '0') + parent.set('layoutInCell', '1'), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center' makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center' diff --git a/src/calibre/ebooks/docx/writer/links.py b/src/calibre/ebooks/docx/writer/links.py index 647b06298e..b34c2c215f 100644 --- a/src/calibre/ebooks/docx/writer/links.py +++ b/src/calibre/ebooks/docx/writer/links.py @@ -36,7 +36,7 @@ class TOCItem: def serialize(self, body, makeelement): p = makeelement(body, 'w:p', append=False) ppr = makeelement(p, 'w:pPr') - makeelement(ppr, 'w:pStyle', w_val="Normal") + makeelement(ppr, 'w:pStyle', w_val='Normal') makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level)) if self.is_first: makeelement(ppr, 'w:pageBreakBefore', w_val='off') diff --git a/src/calibre/ebooks/docx/writer/lists.py b/src/calibre/ebooks/docx/writer/lists.py index 0859530d55..aced84081b 100644 --- a/src/calibre/ebooks/docx/writer/lists.py +++ b/src/calibre/ebooks/docx/writer/lists.py @@ -121,7 +121,7 @@ class Level: makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=str(1152 + self.ilvl * 360)) if self.num_fmt == 'bullet': ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New') - makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default") + makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint='default') class ListsManager: diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index 73f85d49eb..5b299c4076 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -199,6 +199,7 @@ LINE_STYLES = { 'outset': 'outset', } + def convert_underline(items): style = 'solid' has_underline = False @@ -478,9 +479,9 @@ def read_css_block_borders(self, css, store_css_style=False): setattr(self, 'css_margin_' + edge, '') setattr(self, 'border_%s_width' % edge, 2) setattr(self, 'border_%s_color' % edge, None) - setattr(self, 'border_%s_style' % edge, 'none') + setattr(self, 'border_%s_style' % edge, 'none') if store_css_style: - setattr(self, 'border_%s_css_style' % edge, 'none') + setattr(self, 'border_%s_css_style' % edge, 'none') else: # In DOCX padding can only be a positive integer try: @@ -499,9 +500,9 @@ def read_css_block_borders(self, css, store_css_style=False): val = min(96, max(2, int(val * 8))) setattr(self, 'border_%s_width' % edge, val) setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]) or 'auto') - setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none')) + setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none')) if store_css_style: - setattr(self, 'border_%s_css_style' % edge, css['border-%s-style' % edge].lower()) + setattr(self, 'border_%s_css_style' % edge, css['border-%s-style' % edge].lower()) class BlockStyle(DOCXStyle): diff --git a/src/calibre/ebooks/docx/writer/tables.py b/src/calibre/ebooks/docx/writer/tables.py index 3f65d6a381..7f2e83b28c 100644 --- a/src/calibre/ebooks/docx/writer/tables.py +++ b/src/calibre/ebooks/docx/writer/tables.py @@ -121,7 +121,7 @@ class Cell: # cell level bc = self.background_color or self.row.background_color or self.row.table.background_color if bc: - makeelement(tcPr, 'w:shd', w_val="clear", w_color="auto", w_fill=bc) + makeelement(tcPr, 'w:shd', w_val='clear', w_color='auto', w_fill=bc) b = makeelement(tcPr, 'w:tcBorders', append=False) for edge, border in iteritems(self.borders): diff --git a/src/calibre/ebooks/docx/writer/utils.py b/src/calibre/ebooks/docx/writer/utils.py index e0ff6d9f66..1f7d5f6817 100644 --- a/src/calibre/ebooks/docx/writer/utils.py +++ b/src/calibre/ebooks/docx/writer/utils.py @@ -13,8 +13,8 @@ def int_or_zero(raw): except (ValueError, TypeError, AttributeError): return 0 -# convert_color() {{{ +# convert_color() {{{ def convert_color(value): if not value: diff --git a/src/calibre/ebooks/epub/cfi/parse.py b/src/calibre/ebooks/epub/cfi/parse.py index a3fd308e1e..c43bb3775b 100644 --- a/src/calibre/ebooks/epub/cfi/parse.py +++ b/src/calibre/ebooks/epub/cfi/parse.py @@ -8,7 +8,6 @@ import regex class Parser: - ''' See epubcfi.ebnf for the specification that this parser tries to follow. I have implemented it manually, since I dont want to depend on grako, and the grammar is pretty simple. This parser is thread-safe, i.e. diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py index 7f58aa728c..bfccca2090 100644 --- a/src/calibre/ebooks/epub/pages.py +++ b/src/calibre/ebooks/epub/pages.py @@ -48,7 +48,7 @@ def add_page_map(opfpath, opts): oeb = OEBBook(opfpath) selector = XPath(opts.page, namespaces=NSMAP) name_for = build_name_for(opts.page_names) - idgen = ("calibre-page-%d" % n for n in count(1)) + idgen = ('calibre-page-%d' % n for n in count(1)) for item in oeb.spine: data = item.data for elem in selector(data): diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 7b67ad15cd..54f8b6e019 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -24,7 +24,6 @@ from polyglot.urllib import urlparse, urlunparse class Link: - ''' Represents a link in a HTML file. ''' @@ -79,7 +78,6 @@ class IgnoreFile(Exception): class HTMLFile: - ''' Contains basic information about an HTML file. This includes a list of links to other files as well as @@ -91,7 +89,7 @@ class HTMLFile: HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE) HTML_PAT_BIN = re.compile(br'<\s*html', re.IGNORECASE) - TITLE_PAT = re.compile('<title>([^<>]+)', re.IGNORECASE) + TITLE_PAT = re.compile(r'([^<>]+)', re.IGNORECASE) LINK_PAT = re.compile( r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P[^"]+)")|(?:\'(?P[^\']+)\')|(?P[^\s>]+))', re.DOTALL|re.IGNORECASE) @@ -256,7 +254,7 @@ def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None ''' assert max_levels >= 0 level = 0 - flat = [HTMLFile(path_to_html_file, level, encoding, verbose, correct_case_mismatches=correct_case_mismatches)] + flat = [HTMLFile(path_to_html_file, level, encoding, verbose, correct_case_mismatches=correct_case_mismatches)] seen = {flat[0].path} next_level = list(flat) while level < max_levels and len(next_level) > 0: diff --git a/src/calibre/ebooks/html_entities.py b/src/calibre/ebooks/html_entities.py index 6817ac03c8..397537923a 100644 --- a/src/calibre/ebooks/html_entities.py +++ b/src/calibre/ebooks/html_entities.py @@ -2194,8 +2194,8 @@ def find_tests(): t('a&b<c', 'a&b')[0] - self.assertTrue(t('add_attrs', "a=2")(p)) + self.assertTrue(t('add_attrs', 'a=2')(p)) self.ae(p.items(), [('a', '2')]) p = r('

ts')[0] diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index a950fce2c3..23710d0fb1 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -269,7 +269,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML): tag = 'div' # Add page-break-brefore: always because renders typically treat a new file (we're merging files) # as a page break and remove all other page break types that might be set. - style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a) + style_a = 'page-break-before: always; %s' % re.sub(r'page-break-[^:]+:[^;]+;?', '', style_a) # Remove unnecessary spaces. style_a = re.sub(r'\s{2,}', ' ', style_a).strip() tags.append(tag) @@ -338,7 +338,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML): if self.opts.htmlz_class_style == 'external': css = '' else: - css = '' + css = '' title = '%s' % prepare_string_for_xml(self.book_title) output = [''] + \ [css] + [title, ''] + output + [''] @@ -411,14 +411,14 @@ def oeb2html_no_css(oeb_book, log, opts): izer = OEB2HTMLNoCSSizer(log) html = izer.oeb2html(oeb_book, opts) images = izer.images - return (html, images) + return html, images def oeb2html_inline_css(oeb_book, log, opts): izer = OEB2HTMLInlineCSSizer(log) html = izer.oeb2html(oeb_book, opts) images = izer.images - return (html, images) + return html, images def oeb2html_class_css(oeb_book, log, opts): @@ -426,4 +426,4 @@ def oeb2html_class_css(oeb_book, log, opts): setattr(opts, 'class_style', 'inline') html = izer.oeb2html(oeb_book, opts) images = izer.images - return (html, images) + return html, images diff --git a/src/calibre/ebooks/hyphenate.py b/src/calibre/ebooks/hyphenate.py index 78480cdf57..6a19eaeb1a 100644 --- a/src/calibre/ebooks/hyphenate.py +++ b/src/calibre/ebooks/hyphenate.py @@ -1,4 +1,4 @@ -""" Hyphenation, using Frank Liang's algorithm. +''' Hyphenation, using Frank Liang's algorithm. This module provides a single function to hyphenate words. hyphenate_word takes a string (the word), and returns a list of parts that can be separated by hyphens. @@ -12,7 +12,7 @@ Ned Batchelder, July 2007. This Python code is in the public domain. -""" +''' import re @@ -29,13 +29,13 @@ class Hyphenator: self.exceptions = {} for ex in exceptions.split(): # Convert the hyphenated pattern into a point array for use later. - self.exceptions[ex.replace('-', '')] = [0] + [int(h == '-') for h in re.split(r"[a-z]", ex)] + self.exceptions[ex.replace('-', '')] = [0] + [int(h == '-') for h in re.split(r'[a-z]', ex)] def _insert_pattern(self, pattern): # Convert a pattern like 'a1bc3d4' into a string of chars 'abcd' # and a list of points [ 1, 0, 3, 4 ]. - chars = re.sub('[0-9]', '', pattern) - points = [int(d or 0) for d in re.split("[.a-z]", pattern)] + chars = re.sub(r'[0-9]', '', pattern) + points = [int(d or 0) for d in re.split(r'[.a-z]', pattern)] # Insert the pattern into the tree. Each character finds a dict # another level down in the tree, and leaf nodes have the list of @@ -48,9 +48,9 @@ class Hyphenator: t[None] = points def hyphenate_word(self, word): - """ Given a word, returns a list of pieces, broken at the possible + ''' Given a word, returns a list of pieces, broken at the possible hyphenation points. - """ + ''' # Short words aren't hyphenated. if len(word) <= 4: return [word] @@ -86,7 +86,7 @@ class Hyphenator: patterns = ( # Knuth and Liang's original hyphenation patterns from classic TeX. # In the public domain. -""" +''' .ach4 .ad4der .af1t .al3t .am5at .an5c .ang4 .ani5m .ant4 .an3te .anti5s .ar5s .ar4tie .ar4ty .as3c .as1p .as1s .aster5 .atom5 .au1d .av4i .awn4 .ba4g .ba5na .bas4e .ber4 .be5ra .be3sm .be5sto .bri2 .but4ti .cam4pe .can5c .capa5b .car5ol @@ -436,7 +436,7 @@ ympa3 yn3chr yn5d yn5g yn5ic 5ynx y1o4 yo5d y4o5g yom4 yo5net y4ons y4os y4ped yper5 yp3i y3po y4poc yp2ta y5pu yra5m yr5ia y3ro yr4r ys4c y3s2e ys3ica ys3io 3ysis y4so yss4 ys1t ys3ta ysur4 y3thin yt3ic y1w za1 z5a2b zar2 4zb 2ze ze4n ze4p z1er ze3ro zet4 2z1i z4il z4is 5zl 4zm 1zo zo4m zo5ol zte4 4z1z2 z4zy -""" +''' # Extra patterns, from ushyphmax.tex, dated 2005-05-30. # Copyright (C) 1990, 2004, 2005 Gerard D.C. Kuiken. # Copying and distribution of this file, with or without modification, @@ -446,7 +446,7 @@ ze4p z1er ze3ro zet4 2z1i z4il z4is 5zl 4zm 1zo zo4m zo5ol zte4 4z1z2 z4zy # These patterns are based on the Hyphenation Exception Log # published in TUGboat, Volume 10 (1989), No. 3, pp. 337-341, # and a large number of incorrectly hyphenated words not yet published. -""" +''' .con5gr .de5riva .dri5v4 .eth1y6l1 .eu4ler .ev2 .ever5si5b .ga4s1om1 .ge4ome .ge5ot1 .he3mo1 .he3p6a .he3roe .in5u2t .kil2n3i .ko6r1te1 .le6ices .me4ga1l .met4ala .mim5i2c1 .mi1s4ers .ne6o3f .noe1th .non1e2m .poly1s .post1am .pre1am @@ -502,13 +502,13 @@ uea1m u2r1al. uri4al. us2er. v1ativ v1oir5du1 va6guer vaude3v 1verely. v1er1eig ves1tite vi1vip3a3r voice1p waste3w6a2 wave1g4 w3c week1n wide5sp wo4k1en wrap3aro writ6er. x1q xquis3 y5che3d ym5e5try y1stro yes5ter1y z3ian. z3o1phr z2z3w -""") +''') -exceptions = """ +exceptions = ''' as-so-ciate as-so-ciates dec-li-na-tion oblig-a-tory phil-an-thropic present presents project projects reci-procity re-cog-ni-zance ref-or-ma-tion ret-ri-bu-tion ta-ble -""" +''' hyphenator = Hyphenator(patterns, exceptions) hyphenate_word = hyphenator.hyphenate_word diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py index 13114268d9..9b381662e3 100644 --- a/src/calibre/ebooks/lit/maps/__init__.py +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -1,9 +1,9 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -""" +''' Microsoft LIT tag and attribute tables. -""" +''' from calibre.ebooks.lit.maps.html import MAP as HTML_MAP from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py index c144d55ea8..92c249d25c 100644 --- a/src/calibre/ebooks/lit/maps/html.py +++ b/src/calibre/ebooks/lit/maps/html.py @@ -1,903 +1,903 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -""" +''' Microsoft LIT HTML tag and attribute tables, copied from ConvertLIT. -""" +''' TAGS = [ None, None, None, - "a", - "acronym", - "address", - "applet", - "area", - "b", - "base", - "basefont", - "bdo", - "bgsound", - "big", - "blink", - "blockquote", - "body", - "br", - "button", - "caption", - "center", - "cite", - "code", - "col", - "colgroup", + 'a', + 'acronym', + 'address', + 'applet', + 'area', + 'b', + 'base', + 'basefont', + 'bdo', + 'bgsound', + 'big', + 'blink', + 'blockquote', + 'body', + 'br', + 'button', + 'caption', + 'center', + 'cite', + 'code', + 'col', + 'colgroup', None, None, - "dd", - "del", - "dfn", - "dir", - "div", - "dl", - "dt", - "em", - "embed", - "fieldset", - "font", - "form", - "frame", - "frameset", + 'dd', + 'del', + 'dfn', + 'dir', + 'div', + 'dl', + 'dt', + 'em', + 'embed', + 'fieldset', + 'font', + 'form', + 'frame', + 'frameset', None, - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "hr", - "html", - "i", - "iframe", - "img", - "input", - "ins", - "kbd", - "label", - "legend", - "li", - "link", - "tag61", - "map", - "tag63", - "tag64", - "meta", - "nextid", - "nobr", - "noembed", - "noframes", - "noscript", - "object", - "ol", - "option", - "p", - "param", - "plaintext", - "pre", - "q", - "rp", - "rt", - "ruby", - "s", - "samp", - "script", - "select", - "small", - "span", - "strike", - "strong", - "style", - "sub", - "sup", - "table", - "tbody", - "tc", - "td", - "textarea", - "tfoot", - "th", - "thead", - "title", - "tr", - "tt", - "u", - "ul", - "var", - "wbr", + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'head', + 'hr', + 'html', + 'i', + 'iframe', + 'img', + 'input', + 'ins', + 'kbd', + 'label', + 'legend', + 'li', + 'link', + 'tag61', + 'map', + 'tag63', + 'tag64', + 'meta', + 'nextid', + 'nobr', + 'noembed', + 'noframes', + 'noscript', + 'object', + 'ol', + 'option', + 'p', + 'param', + 'plaintext', + 'pre', + 'q', + 'rp', + 'rt', + 'ruby', + 's', + 'samp', + 'script', + 'select', + 'small', + 'span', + 'strike', + 'strong', + 'style', + 'sub', + 'sup', + 'table', + 'tbody', + 'tc', + 'td', + 'textarea', + 'tfoot', + 'th', + 'thead', + 'title', + 'tr', + 'tt', + 'u', + 'ul', + 'var', + 'wbr', None, ] ATTRS0 = { - 0x8010: "tabindex", - 0x8046: "title", - 0x804b: "style", - 0x804d: "disabled", - 0x83ea: "class", - 0x83eb: "id", - 0x83fe: "datafld", - 0x83ff: "datasrc", - 0x8400: "dataformatas", - 0x87d6: "accesskey", - 0x9392: "lang", - 0x93ed: "language", - 0x93fe: "dir", - 0x9771: "onmouseover", - 0x9772: "onmouseout", - 0x9773: "onmousedown", - 0x9774: "onmouseup", - 0x9775: "onmousemove", - 0x9776: "onkeydown", - 0x9777: "onkeyup", - 0x9778: "onkeypress", - 0x9779: "onclick", - 0x977a: "ondblclick", - 0x977e: "onhelp", - 0x977f: "onfocus", - 0x9780: "onblur", - 0x9783: "onrowexit", - 0x9784: "onrowenter", - 0x9786: "onbeforeupdate", - 0x9787: "onafterupdate", - 0x978a: "onreadystatechange", - 0x9790: "onscroll", - 0x9794: "ondragstart", - 0x9795: "onresize", - 0x9796: "onselectstart", - 0x9797: "onerrorupdate", - 0x9799: "ondatasetchanged", - 0x979a: "ondataavailable", - 0x979b: "ondatasetcomplete", - 0x979c: "onfilterchange", - 0x979f: "onlosecapture", - 0x97a0: "onpropertychange", - 0x97a2: "ondrag", - 0x97a3: "ondragend", - 0x97a4: "ondragenter", - 0x97a5: "ondragover", - 0x97a6: "ondragleave", - 0x97a7: "ondrop", - 0x97a8: "oncut", - 0x97a9: "oncopy", - 0x97aa: "onpaste", - 0x97ab: "onbeforecut", - 0x97ac: "onbeforecopy", - 0x97ad: "onbeforepaste", - 0x97af: "onrowsdelete", - 0x97b0: "onrowsinserted", - 0x97b1: "oncellchange", - 0x97b2: "oncontextmenu", - 0x97b6: "onbeforeeditfocus", + 0x8010: 'tabindex', + 0x8046: 'title', + 0x804b: 'style', + 0x804d: 'disabled', + 0x83ea: 'class', + 0x83eb: 'id', + 0x83fe: 'datafld', + 0x83ff: 'datasrc', + 0x8400: 'dataformatas', + 0x87d6: 'accesskey', + 0x9392: 'lang', + 0x93ed: 'language', + 0x93fe: 'dir', + 0x9771: 'onmouseover', + 0x9772: 'onmouseout', + 0x9773: 'onmousedown', + 0x9774: 'onmouseup', + 0x9775: 'onmousemove', + 0x9776: 'onkeydown', + 0x9777: 'onkeyup', + 0x9778: 'onkeypress', + 0x9779: 'onclick', + 0x977a: 'ondblclick', + 0x977e: 'onhelp', + 0x977f: 'onfocus', + 0x9780: 'onblur', + 0x9783: 'onrowexit', + 0x9784: 'onrowenter', + 0x9786: 'onbeforeupdate', + 0x9787: 'onafterupdate', + 0x978a: 'onreadystatechange', + 0x9790: 'onscroll', + 0x9794: 'ondragstart', + 0x9795: 'onresize', + 0x9796: 'onselectstart', + 0x9797: 'onerrorupdate', + 0x9799: 'ondatasetchanged', + 0x979a: 'ondataavailable', + 0x979b: 'ondatasetcomplete', + 0x979c: 'onfilterchange', + 0x979f: 'onlosecapture', + 0x97a0: 'onpropertychange', + 0x97a2: 'ondrag', + 0x97a3: 'ondragend', + 0x97a4: 'ondragenter', + 0x97a5: 'ondragover', + 0x97a6: 'ondragleave', + 0x97a7: 'ondrop', + 0x97a8: 'oncut', + 0x97a9: 'oncopy', + 0x97aa: 'onpaste', + 0x97ab: 'onbeforecut', + 0x97ac: 'onbeforecopy', + 0x97ad: 'onbeforepaste', + 0x97af: 'onrowsdelete', + 0x97b0: 'onrowsinserted', + 0x97b1: 'oncellchange', + 0x97b2: 'oncontextmenu', + 0x97b6: 'onbeforeeditfocus', } ATTRS3 = { - 0x0001: "href", - 0x03ec: "target", - 0x03ee: "rel", - 0x03ef: "rev", - 0x03f0: "urn", - 0x03f1: "methods", - 0x8001: "name", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x0001: 'href', + 0x03ec: 'target', + 0x03ee: 'rel', + 0x03ef: 'rev', + 0x03f0: 'urn', + 0x03f1: 'methods', + 0x8001: 'name', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS5 = { - 0x9399: "clear", + 0x9399: 'clear', } ATTRS6 = { - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x804a: "align", - 0x8bbb: "classid", - 0x8bbc: "data", - 0x8bbf: "codebase", - 0x8bc0: "codetype", - 0x8bc1: "code", - 0x8bc2: "type", - 0x8bc5: "vspace", - 0x8bc6: "hspace", - 0x978e: "onerror", + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x804a: 'align', + 0x8bbb: 'classid', + 0x8bbc: 'data', + 0x8bbf: 'codebase', + 0x8bc0: 'codetype', + 0x8bc1: 'code', + 0x8bc2: 'type', + 0x8bc5: 'vspace', + 0x8bc6: 'hspace', + 0x978e: 'onerror', } ATTRS7 = { - 0x0001: "href", - 0x03ea: "shape", - 0x03eb: "coords", - 0x03ed: "target", - 0x03ee: "alt", - 0x03ef: "nohref", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x0001: 'href', + 0x03ea: 'shape', + 0x03eb: 'coords', + 0x03ed: 'target', + 0x03ee: 'alt', + 0x03ef: 'nohref', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS8 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS9 = { - 0x03ec: "href", - 0x03ed: "target", + 0x03ec: 'href', + 0x03ed: 'target', } ATTRS10 = { - 0x938b: "color", - 0x939b: "face", - 0x93a3: "size", + 0x938b: 'color', + 0x939b: 'face', + 0x93a3: 'size', } ATTRS12 = { - 0x03ea: "src", - 0x03eb: "loop", - 0x03ec: "volume", - 0x03ed: "balance", + 0x03ea: 'src', + 0x03eb: 'loop', + 0x03ec: 'volume', + 0x03ed: 'balance', } ATTRS13 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS15 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS16 = { - 0x07db: "link", - 0x07dc: "alink", - 0x07dd: "vlink", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938a: "background", - 0x938b: "text", - 0x938e: "nowrap", - 0x93ae: "topmargin", - 0x93af: "rightmargin", - 0x93b0: "bottommargin", - 0x93b1: "leftmargin", - 0x93b6: "bgproperties", - 0x93d8: "scroll", - 0x977b: "onselect", - 0x9791: "onload", - 0x9792: "onunload", - 0x9798: "onbeforeunload", - 0x97b3: "onbeforeprint", - 0x97b4: "onafterprint", - 0xfe0c: "bgcolor", + 0x07db: 'link', + 0x07dc: 'alink', + 0x07dd: 'vlink', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938a: 'background', + 0x938b: 'text', + 0x938e: 'nowrap', + 0x93ae: 'topmargin', + 0x93af: 'rightmargin', + 0x93b0: 'bottommargin', + 0x93b1: 'leftmargin', + 0x93b6: 'bgproperties', + 0x93d8: 'scroll', + 0x977b: 'onselect', + 0x9791: 'onload', + 0x9792: 'onunload', + 0x9798: 'onbeforeunload', + 0x97b3: 'onbeforeprint', + 0x97b4: 'onafterprint', + 0xfe0c: 'bgcolor', } ATTRS17 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS18 = { - 0x07d1: "type", - 0x8001: "name", + 0x07d1: 'type', + 0x8001: 'name', } ATTRS19 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x93a8: "valign", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x93a8: 'valign', } ATTRS20 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS21 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS22 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS23 = { - 0x03ea: "span", - 0x8006: "width", - 0x8049: "align", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x03ea: 'span', + 0x8006: 'width', + 0x8049: 'align', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS24 = { - 0x03ea: "span", - 0x8006: "width", - 0x8049: "align", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x03ea: 'span', + 0x8006: 'width', + 0x8049: 'align', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS27 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938e: "nowrap", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938e: 'nowrap', } ATTRS29 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS31 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938e: "nowrap", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938e: 'nowrap', } ATTRS32 = { - 0x03ea: "compact", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x03ea: 'compact', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS33 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938e: "nowrap", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938e: 'nowrap', } ATTRS34 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS35 = { - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x804a: "align", - 0x8bbd: "palette", - 0x8bbe: "pluginspage", + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x804a: 'align', + 0x8bbd: 'palette', + 0x8bbe: 'pluginspage', # 0x8bbf: "codebase", - 0x8bbf: "src", - 0x8bc1: "units", - 0x8bc2: "type", - 0x8bc3: "hidden", + 0x8bbf: 'src', + 0x8bc1: 'units', + 0x8bc2: 'type', + 0x8bc3: 'hidden', } ATTRS36 = { - 0x804a: "align", + 0x804a: 'align', } ATTRS37 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938b: "color", - 0x939b: "face", - 0x939c: "size", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938b: 'color', + 0x939b: 'face', + 0x939c: 'size', } ATTRS38 = { - 0x03ea: "action", - 0x03ec: "enctype", - 0x03ed: "method", - 0x03ef: "target", - 0x03f4: "accept-charset", - 0x8001: "name", - 0x977c: "onsubmit", - 0x977d: "onreset", + 0x03ea: 'action', + 0x03ec: 'enctype', + 0x03ed: 'method', + 0x03ef: 'target', + 0x03f4: 'accept-charset', + 0x8001: 'name', + 0x977c: 'onsubmit', + 0x977d: 'onreset', } ATTRS39 = { - 0x8000: "align", - 0x8001: "name", - 0x8bb9: "src", - 0x8bbb: "border", - 0x8bbc: "frameborder", - 0x8bbd: "framespacing", - 0x8bbe: "marginwidth", - 0x8bbf: "marginheight", - 0x8bc0: "noresize", - 0x8bc1: "scrolling", - 0x8fa2: "bordercolor", + 0x8000: 'align', + 0x8001: 'name', + 0x8bb9: 'src', + 0x8bbb: 'border', + 0x8bbc: 'frameborder', + 0x8bbd: 'framespacing', + 0x8bbe: 'marginwidth', + 0x8bbf: 'marginheight', + 0x8bc0: 'noresize', + 0x8bc1: 'scrolling', + 0x8fa2: 'bordercolor', } ATTRS40 = { - 0x03e9: "rows", - 0x03ea: "cols", - 0x03eb: "border", - 0x03ec: "bordercolor", - 0x03ed: "frameborder", - 0x03ee: "framespacing", - 0x8001: "name", - 0x9791: "onload", - 0x9792: "onunload", - 0x9798: "onbeforeunload", - 0x97b3: "onbeforeprint", - 0x97b4: "onafterprint", + 0x03e9: 'rows', + 0x03ea: 'cols', + 0x03eb: 'border', + 0x03ec: 'bordercolor', + 0x03ed: 'frameborder', + 0x03ee: 'framespacing', + 0x8001: 'name', + 0x9791: 'onload', + 0x9792: 'onunload', + 0x9798: 'onbeforeunload', + 0x97b3: 'onbeforeprint', + 0x97b4: 'onafterprint', } ATTRS42 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS43 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS44 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS45 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS46 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS47 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS49 = { - 0x03ea: "noshade", - 0x8006: "width", - 0x8007: "size", - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938b: "color", + 0x03ea: 'noshade', + 0x8006: 'width', + 0x8007: 'size', + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938b: 'color', } ATTRS51 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS52 = { - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x804a: "align", - 0x8bb9: "src", - 0x8bbb: "border", - 0x8bbc: "frameborder", - 0x8bbd: "framespacing", - 0x8bbe: "marginwidth", - 0x8bbf: "marginheight", - 0x8bc0: "noresize", - 0x8bc1: "scrolling", - 0x8fa2: "vspace", - 0x8fa3: "hspace", + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x804a: 'align', + 0x8bb9: 'src', + 0x8bbb: 'border', + 0x8bbc: 'frameborder', + 0x8bbd: 'framespacing', + 0x8bbe: 'marginwidth', + 0x8bbf: 'marginheight', + 0x8bc0: 'noresize', + 0x8bc1: 'scrolling', + 0x8fa2: 'vspace', + 0x8fa3: 'hspace', } ATTRS53 = { - 0x03eb: "alt", - 0x03ec: "src", - 0x03ed: "border", - 0x03ee: "vspace", - 0x03ef: "hspace", - 0x03f0: "lowsrc", - 0x03f1: "vrml", - 0x03f2: "dynsrc", - 0x03f4: "loop", - 0x03f6: "start", - 0x07d3: "ismap", - 0x07d9: "usemap", - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x8046: "title", - 0x804a: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x978d: "onabort", - 0x978e: "onerror", - 0x9791: "onload", + 0x03eb: 'alt', + 0x03ec: 'src', + 0x03ed: 'border', + 0x03ee: 'vspace', + 0x03ef: 'hspace', + 0x03f0: 'lowsrc', + 0x03f1: 'vrml', + 0x03f2: 'dynsrc', + 0x03f4: 'loop', + 0x03f6: 'start', + 0x07d3: 'ismap', + 0x07d9: 'usemap', + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x8046: 'title', + 0x804a: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x978d: 'onabort', + 0x978e: 'onerror', + 0x9791: 'onload', } ATTRS54 = { - 0x07d1: "type", - 0x07d3: "size", - 0x07d4: "maxlength", - 0x07d6: "readonly", - 0x07d8: "indeterminate", - 0x07da: "checked", - 0x07db: "alt", - 0x07dc: "src", - 0x07dd: "border", - 0x07de: "vspace", - 0x07df: "hspace", - 0x07e0: "lowsrc", - 0x07e1: "vrml", - 0x07e2: "dynsrc", - 0x07e4: "loop", - 0x07e5: "start", - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x804a: "align", - 0x93ee: "value", - 0x977b: "onselect", - 0x978d: "onabort", - 0x978e: "onerror", - 0x978f: "onchange", - 0x9791: "onload", + 0x07d1: 'type', + 0x07d3: 'size', + 0x07d4: 'maxlength', + 0x07d6: 'readonly', + 0x07d8: 'indeterminate', + 0x07da: 'checked', + 0x07db: 'alt', + 0x07dc: 'src', + 0x07dd: 'border', + 0x07de: 'vspace', + 0x07df: 'hspace', + 0x07e0: 'lowsrc', + 0x07e1: 'vrml', + 0x07e2: 'dynsrc', + 0x07e4: 'loop', + 0x07e5: 'start', + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x804a: 'align', + 0x93ee: 'value', + 0x977b: 'onselect', + 0x978d: 'onabort', + 0x978e: 'onerror', + 0x978f: 'onchange', + 0x9791: 'onload', } ATTRS56 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS57 = { - 0x03e9: "for", + 0x03e9: 'for', } ATTRS58 = { - 0x804a: "align", + 0x804a: 'align', } ATTRS59 = { - 0x03ea: "value", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x939a: "type", + 0x03ea: 'value', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x939a: 'type', } ATTRS60 = { - 0x03ee: "href", - 0x03ef: "rel", - 0x03f0: "rev", - 0x03f1: "type", - 0x03f9: "media", - 0x03fa: "target", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x978e: "onerror", - 0x9791: "onload", + 0x03ee: 'href', + 0x03ef: 'rel', + 0x03f0: 'rev', + 0x03f1: 'type', + 0x03f9: 'media', + 0x03fa: 'target', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x978e: 'onerror', + 0x9791: 'onload', } ATTRS61 = { - 0x9399: "clear", + 0x9399: 'clear', } ATTRS62 = { - 0x8001: "name", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8001: 'name', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS63 = { - 0x1771: "scrolldelay", - 0x1772: "direction", - 0x1773: "behavior", - 0x1774: "scrollamount", - 0x1775: "loop", - 0x1776: "vspace", - 0x1777: "hspace", - 0x1778: "truespeed", - 0x8006: "width", - 0x8007: "height", - 0x9785: "onbounce", - 0x978b: "onfinish", - 0x978c: "onstart", - 0xfe0c: "bgcolor", + 0x1771: 'scrolldelay', + 0x1772: 'direction', + 0x1773: 'behavior', + 0x1774: 'scrollamount', + 0x1775: 'loop', + 0x1776: 'vspace', + 0x1777: 'hspace', + 0x1778: 'truespeed', + 0x8006: 'width', + 0x8007: 'height', + 0x9785: 'onbounce', + 0x978b: 'onfinish', + 0x978c: 'onstart', + 0xfe0c: 'bgcolor', } ATTRS65 = { - 0x03ea: "http-equiv", - 0x03eb: "content", - 0x03ec: "url", - 0x03f6: "charset", - 0x8001: "name", + 0x03ea: 'http-equiv', + 0x03eb: 'content', + 0x03ec: 'url', + 0x03f6: 'charset', + 0x8001: 'name', } ATTRS66 = { - 0x03f5: "n", + 0x03f5: 'n', } ATTRS71 = { # 0x8000: "border", - 0x8000: "usemap", - 0x8001: "name", - 0x8006: "width", - 0x8007: "height", - 0x8046: "title", - 0x804a: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x8bbb: "classid", - 0x8bbc: "data", - 0x8bbf: "codebase", - 0x8bc0: "codetype", - 0x8bc1: "code", - 0x8bc2: "type", - 0x8bc5: "vspace", - 0x8bc6: "hspace", - 0x978e: "onerror", + 0x8000: 'usemap', + 0x8001: 'name', + 0x8006: 'width', + 0x8007: 'height', + 0x8046: 'title', + 0x804a: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x8bbb: 'classid', + 0x8bbc: 'data', + 0x8bbf: 'codebase', + 0x8bc0: 'codetype', + 0x8bc1: 'code', + 0x8bc2: 'type', + 0x8bc5: 'vspace', + 0x8bc6: 'hspace', + 0x978e: 'onerror', } ATTRS72 = { - 0x03eb: "compact", - 0x03ec: "start", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x939a: "type", + 0x03eb: 'compact', + 0x03ec: 'start', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x939a: 'type', } ATTRS73 = { - 0x03ea: "selected", - 0x03eb: "value", + 0x03ea: 'selected', + 0x03eb: 'value', } ATTRS74 = { - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS75 = { # 0x8000: "name", # 0x8000: "value", - 0x8000: "type", + 0x8000: 'type', } ATTRS76 = { - 0x9399: "clear", + 0x9399: 'clear', } ATTRS77 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x9399: "clear", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x9399: 'clear', } ATTRS78 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS82 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS83 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS84 = { - 0x03ea: "src", - 0x03ed: "for", - 0x03ee: "event", - 0x03f0: "defer", - 0x03f2: "type", - 0x978e: "onerror", + 0x03ea: 'src', + 0x03ed: 'for', + 0x03ee: 'event', + 0x03f0: 'defer', + 0x03f2: 'type', + 0x978e: 'onerror', } ATTRS85 = { - 0x03eb: "size", - 0x03ec: "multiple", - 0x8000: "align", - 0x8001: "name", - 0x978f: "onchange", + 0x03eb: 'size', + 0x03ec: 'multiple', + 0x8000: 'align', + 0x8001: 'name', + 0x978f: 'onchange', } ATTRS86 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS87 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS88 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS89 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS90 = { - 0x03eb: "type", - 0x03ef: "media", - 0x8046: "title", - 0x978e: "onerror", - 0x9791: "onload", + 0x03eb: 'type', + 0x03ef: 'media', + 0x8046: 'title', + 0x978e: 'onerror', + 0x9791: 'onload', } ATTRS91 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS92 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS93 = { - 0x03ea: "cols", - 0x03eb: "border", - 0x03ec: "rules", - 0x03ed: "frame", - 0x03ee: "cellspacing", - 0x03ef: "cellpadding", - 0x03fa: "datapagesize", - 0x8006: "width", - 0x8007: "height", - 0x8046: "title", - 0x804a: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938a: "background", - 0x93a5: "bordercolor", - 0x93a6: "bordercolorlight", - 0x93a7: "bordercolordark", - 0xfe0c: "bgcolor", + 0x03ea: 'cols', + 0x03eb: 'border', + 0x03ec: 'rules', + 0x03ed: 'frame', + 0x03ee: 'cellspacing', + 0x03ef: 'cellpadding', + 0x03fa: 'datapagesize', + 0x8006: 'width', + 0x8007: 'height', + 0x8046: 'title', + 0x804a: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938a: 'background', + 0x93a5: 'bordercolor', + 0x93a6: 'bordercolorlight', + 0x93a7: 'bordercolordark', + 0xfe0c: 'bgcolor', } ATTRS94 = { - 0x8049: "align", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x8049: 'align', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS95 = { - 0x8049: "align", - 0x93a8: "valign", + 0x8049: 'align', + 0x93a8: 'valign', } ATTRS96 = { - 0x07d2: "rowspan", - 0x07d3: "colspan", - 0x8006: "width", - 0x8007: "height", - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938a: "background", - 0x938e: "nowrap", - 0x93a5: "bordercolor", - 0x93a6: "bordercolorlight", - 0x93a7: "bordercolordark", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x07d2: 'rowspan', + 0x07d3: 'colspan', + 0x8006: 'width', + 0x8007: 'height', + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938a: 'background', + 0x938e: 'nowrap', + 0x93a5: 'bordercolor', + 0x93a6: 'bordercolorlight', + 0x93a7: 'bordercolordark', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS97 = { - 0x1b5a: "rows", - 0x1b5b: "cols", - 0x1b5c: "wrap", - 0x1b5d: "readonly", - 0x8001: "name", - 0x977b: "onselect", - 0x978f: "onchange", + 0x1b5a: 'rows', + 0x1b5b: 'cols', + 0x1b5c: 'wrap', + 0x1b5d: 'readonly', + 0x8001: 'name', + 0x977b: 'onselect', + 0x978f: 'onchange', } ATTRS98 = { - 0x8049: "align", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x8049: 'align', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS99 = { - 0x07d2: "rowspan", - 0x07d3: "colspan", - 0x8006: "width", - 0x8007: "height", - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x938a: "background", - 0x938e: "nowrap", - 0x93a5: "bordercolor", - 0x93a6: "bordercolorlight", - 0x93a7: "bordercolordark", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x07d2: 'rowspan', + 0x07d3: 'colspan', + 0x8006: 'width', + 0x8007: 'height', + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x938a: 'background', + 0x938e: 'nowrap', + 0x93a5: 'bordercolor', + 0x93a6: 'bordercolorlight', + 0x93a7: 'bordercolordark', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS100 = { - 0x8049: "align", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x8049: 'align', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS102 = { - 0x8007: "height", - 0x8046: "title", - 0x8049: "align", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x93a5: "bordercolor", - 0x93a6: "bordercolorlight", - 0x93a7: "bordercolordark", - 0x93a8: "valign", - 0xfe0c: "bgcolor", + 0x8007: 'height', + 0x8046: 'title', + 0x8049: 'align', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x93a5: 'bordercolor', + 0x93a6: 'bordercolorlight', + 0x93a7: 'bordercolordark', + 0x93a8: 'valign', + 0xfe0c: 'bgcolor', } ATTRS103 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS104 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS105 = { - 0x03eb: "compact", - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", - 0x939a: "type", + 0x03eb: 'compact', + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', + 0x939a: 'type', } ATTRS106 = { - 0x8046: "title", - 0x804b: "style", - 0x83ea: "class", - 0x83eb: "id", + 0x8046: 'title', + 0x804b: 'style', + 0x83ea: 'class', + 0x83eb: 'id', } ATTRS108 = { - 0x9399: "clear", + 0x9399: 'clear', } TAGS_ATTRS = [ diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py index af1355612b..8247ebb326 100644 --- a/src/calibre/ebooks/lit/maps/opf.py +++ b/src/calibre/ebooks/lit/maps/opf.py @@ -1,16 +1,16 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -""" +''' Microsoft LIT OPF tag and attribute tables, copied from ConvertLIT. -""" +''' TAGS = [ None, - "package", - "dc:Title", - "dc:Creator", + 'package', + 'dc:Title', + 'dc:Creator', None, None, None, @@ -23,58 +23,58 @@ TAGS = [ None, None, None, - "manifest", - "item", - "spine", - "itemref", - "metadata", - "dc-metadata", - "dc:Subject", - "dc:Description", - "dc:Publisher", - "dc:Contributor", - "dc:Date", - "dc:Type", - "dc:Format", - "dc:Identifier", - "dc:Source", - "dc:Language", - "dc:Relation", - "dc:Coverage", - "dc:Rights", - "x-metadata", - "meta", - "tours", - "tour", - "site", - "guide", - "reference", + 'manifest', + 'item', + 'spine', + 'itemref', + 'metadata', + 'dc-metadata', + 'dc:Subject', + 'dc:Description', + 'dc:Publisher', + 'dc:Contributor', + 'dc:Date', + 'dc:Type', + 'dc:Format', + 'dc:Identifier', + 'dc:Source', + 'dc:Language', + 'dc:Relation', + 'dc:Coverage', + 'dc:Rights', + 'x-metadata', + 'meta', + 'tours', + 'tour', + 'site', + 'guide', + 'reference', None, ] ATTRS = { - 0x0001: "href", - 0x0002: "%never-used", - 0x0003: "%guid", - 0x0004: "%minimum_level", - 0x0005: "%attr5", - 0x0006: "id", - 0x0007: "href", - 0x0008: "media-type", - 0x0009: "fallback", - 0x000A: "idref", - 0x000B: "xmlns:dc", - 0x000C: "xmlns:oebpackage", - 0x000D: "role", - 0x000E: "file-as", - 0x000F: "event", - 0x0010: "scheme", - 0x0011: "title", - 0x0012: "type", - 0x0013: "unique-identifier", - 0x0014: "name", - 0x0015: "content", - 0x0016: "xml:lang", + 0x0001: 'href', + 0x0002: '%never-used', + 0x0003: '%guid', + 0x0004: '%minimum_level', + 0x0005: '%attr5', + 0x0006: 'id', + 0x0007: 'href', + 0x0008: 'media-type', + 0x0009: 'fallback', + 0x000A: 'idref', + 0x000B: 'xmlns:dc', + 0x000C: 'xmlns:oebpackage', + 0x000D: 'role', + 0x000E: 'file-as', + 0x000F: 'event', + 0x0010: 'scheme', + 0x0011: 'title', + 0x0012: 'type', + 0x0013: 'unique-identifier', + 0x0014: 'name', + 0x0015: 'content', + 0x0016: 'xml:lang', } TAGS_ATTRS = [{} for i in range(43)] diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py index ccd735cd94..2a5eb6a721 100644 --- a/src/calibre/ebooks/lit/mssha1.py +++ b/src/calibre/ebooks/lit/mssha1.py @@ -1,8 +1,8 @@ -""" +''' Modified version of SHA-1 used in Microsoft LIT files. Adapted from the PyPy pure-Python SHA-1 implementation. -""" +''' __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' @@ -21,12 +21,12 @@ from polyglot.builtins import long_type def _long2bytesBigEndian(n, blocksize=0): - """Convert a long integer to a byte string. + '''Convert a long integer to a byte string. If optional blocksize is given and greater than zero, pad the front of the byte string with binary zeros so that the length is a multiple of blocksize. - """ + ''' # After much testing, this algorithm was deemed to be the fastest. s = b'' @@ -47,7 +47,7 @@ def _long2bytesBigEndian(n, blocksize=0): def _bytelist2longBigEndian(blist): - "Transform a list of characters into a list of longs." + 'Transform a list of characters into a list of longs.' imax = len(blist)//4 hl = [0] * imax @@ -67,7 +67,7 @@ def _bytelist2longBigEndian(blist): def _rotateLeft(x, n): - "Rotate x (32 bit) left n bits circular." + 'Rotate x (32 bit) left n bits circular.' return (x << n) | (x >> (32-n)) @@ -123,10 +123,10 @@ K = [ class mssha1: - "An implementation of the MD5 hash function in pure Python." + 'An implementation of the MD5 hash function in pure Python.' def __init__(self): - "Initialisation." + 'Initialisation.' # Initial message length in bits(!). self.length = 0 @@ -140,7 +140,7 @@ class mssha1: self.init() def init(self): - "Initialize the message-digest and set all fields to zero." + 'Initialize the message-digest and set all fields to zero.' self.length = 0 self.input = [] @@ -164,7 +164,7 @@ class mssha1: D = self.H3 E = self.H4 - for t in range(0, 80): + for t in range(80): TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t//20] E = D D = C @@ -182,7 +182,7 @@ class mssha1: # API of the sha module. def update(self, inBuf): - """Add to the current message. + '''Add to the current message. Update the mssha1 object with the string arg. Repeated calls are equivalent to a single call with the concatenation of all @@ -195,7 +195,7 @@ class mssha1: keep an intermediate value for the hash, so that we only need to make minimal recalculation if we call update() to add more data to the hashed string. - """ + ''' inBuf = bytearray(inBuf) leninBuf = long_type(len(inBuf)) @@ -225,12 +225,12 @@ class mssha1: self.input = self.input + inBuf def digest(self): - """Terminate the message-digest computation and return digest. + '''Terminate the message-digest computation and return digest. Return the digest of the strings passed to the update() method so far. This is a 16-byte string which may contain non-ASCII characters, including null bytes. - """ + ''' H0 = self.H0 H1 = self.H1 @@ -273,22 +273,22 @@ class mssha1: return digest def hexdigest(self): - """Terminate and return digest in HEX form. + '''Terminate and return digest in HEX form. Like digest() except the digest is returned as a string of length 32, containing only hexadecimal digits. This may be used to exchange the value safely in email or other non- binary environments. - """ + ''' return ''.join(['%02x' % c for c in bytearray(self.digest())]) def copy(self): - """Return a clone object. + '''Return a clone object. Return a copy ('clone') of the md5 object. This can be used to efficiently compute the digests of strings that share a common initial substring. - """ + ''' return copy.deepcopy(self) @@ -306,10 +306,10 @@ blocksize = 1 def new(arg=None): - """Return a new mssha1 crypto object. + '''Return a new mssha1 crypto object. If arg is present, the method call update(arg) is made. - """ + ''' crypto = mssha1() if arg: @@ -323,7 +323,7 @@ if __name__ == '__main__': import sys file = None if len(sys.argv) > 2: - print("usage: %s [FILE]" % sys.argv[0]) + print('usage: %s [FILE]' % sys.argv[0]) return elif len(sys.argv) < 2: file = sys.stdin diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index a0899a7491..ea9f86588f 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -3,8 +3,8 @@ Support for reading LIT files. ''' __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' \ - 'and Marshall T. Vandegrift ' +__copyright__ = ('2008, Kovid Goyal ' + 'and Marshall T. Vandegrift ') import functools import io @@ -25,23 +25,23 @@ from polyglot.builtins import codepoint_to_chr, itervalues, string_or_bytes from polyglot.urllib import unquote as urlunquote from polyglot.urllib import urldefrag -__all__ = ["LitReader"] +__all__ = ['LitReader'] -XML_DECL = """ -""" -OPF_DECL = """ +XML_DECL = ''' +''' +OPF_DECL = ''' -""" -HTML_DECL = """ +''' +HTML_DECL = ''' -""" +''' -DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" -LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" +DESENCRYPT_GUID = '{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}' +LZXCOMPRESS_GUID = '{0A9007C6-4076-11D3-8789-0000F8105754}' CONTROL_TAG = 4 CONTROL_WINDOW_SIZE = 12 @@ -84,8 +84,8 @@ def encint(byts, remaining): def msguid(bytes): - values = struct.unpack(" 0: - data = (b"\000" * prepad) + data + data = (b'\000' * prepad) + data prepad = 0 postpad = 64 - (len(data) % 64) if postpad < 64: - data = data + (b"\000" * postpad) + data = data + (b'\000' * postpad) hash.update(data) digest = hash.digest() if not isinstance(digest, bytes): @@ -779,7 +778,7 @@ class LitFile: while len(transform) >= 16: csize = (int32(control) + 1) * 4 if csize > len(control) or csize <= 0: - raise LitError("ControlData is too short") + raise LitError('ControlData is too short') guid = msguid(transform) if guid == DESENCRYPT_GUID: content = self.decrypt(content) @@ -791,7 +790,7 @@ class LitFile: content = self.decompress(content, control, reset_table) control = control[csize:] else: - raise LitError("Unrecognized transform: %s." % repr(guid)) + raise LitError('Unrecognized transform: %s.' % repr(guid)) transform = transform[16:] return content @@ -799,18 +798,18 @@ class LitFile: length = len(content) extra = length & 0x7 if extra > 0: - self.warn("content length not a multiple of block size") - content += b"\0" * (8 - extra) + self.warn('content length not a multiple of block size') + content += b'\0' * (8 - extra) msdes.deskey(self.bookkey, msdes.DE1) return msdes.des(content) def decompress(self, content, control, reset_table): - if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != b"LZXC": - raise LitError("Invalid ControlData tag value") + if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != b'LZXC': + raise LitError('Invalid ControlData tag value') if len(reset_table) < (RESET_INTERVAL + 8): - raise LitError("Reset table is too short") + raise LitError('Reset table is too short') if u32(reset_table[RESET_UCLENGTH + 4:]) != 0: - raise LitError("Reset table has 64bit value for UCLENGTH") + raise LitError('Reset table has 64bit value for UCLENGTH') result = [] @@ -820,7 +819,7 @@ class LitFile: u >>= 1 window_size += 1 if window_size < 15 or window_size > 21: - raise LitError("Invalid window in ControlData") + raise LitError('Invalid window in ControlData') lzx.init(window_size) ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8 @@ -836,16 +835,16 @@ class LitFile: size = int32(reset_table[ofs_entry:]) u = int32(reset_table[ofs_entry + 4:]) if u != 0: - raise LitError("Reset table entry greater than 32 bits") + raise LitError('Reset table entry greater than 32 bits') if size >= len(content): - self._warn("LZX reset table entry out of bounds") + self._warn('LZX reset table entry out of bounds') if bytes_remaining >= window_bytes: lzx.reset() try: result.append( lzx.decompress(content[base:size], window_bytes)) except lzx.LZXError: - self.warn("LZX decompression error; skipping chunk") + self.warn('LZX decompression error; skipping chunk') bytes_remaining -= window_bytes base = size accum += int32(reset_table[RESET_INTERVAL:]) @@ -855,16 +854,16 @@ class LitFile: try: result.append(lzx.decompress(content[base:], bytes_remaining)) except lzx.LZXError: - self.warn("LZX decompression error; skipping chunk") + self.warn('LZX decompression error; skipping chunk') bytes_remaining = 0 if bytes_remaining > 0: - raise LitError("Failed to completely decompress section") + raise LitError('Failed to completely decompress section') return b''.join(result) def get_atoms(self, entry): name = '/'.join(('/data', entry.internal, 'atom')) if name not in self.entries: - return ({}, {}) + return {}, {} data = self.get_file(name) nentries, data = u32(data), data[4:] tags = {} @@ -876,9 +875,9 @@ class LitFile: break tags[i], data = data[:size], data[size:] if len(tags) != nentries: - self._warn("damaged or invalid atoms tag table") + self._warn('damaged or invalid atoms tag table') if len(data) < 4: - return (tags, {}) + return tags, {} attrs = {} nentries, data = u32(data), data[4:] for i in range(1, nentries + 1): @@ -889,12 +888,12 @@ class LitFile: break attrs[i], data = data[:size], data[size:] if len(attrs) != nentries: - self._warn("damaged or invalid atoms attributes table") - return (tags, attrs) + self._warn('damaged or invalid atoms attributes table') + return tags, attrs class LitContainer: - """Simple Container-interface, read-only accessor for LIT files.""" + '''Simple Container-interface, read-only accessor for LIT files.''' def __init__(self, filename_or_stream, log): self._litfile = LitFile(filename_or_stream, log) @@ -934,7 +933,7 @@ class LitContainer: except LitError: if b'PENGUIN group' not in raw: raise - print("WARNING: attempting PENGUIN malformed OPF fix") + print('WARNING: attempting PENGUIN malformed OPF fix') raw = raw.replace( b'PENGUIN group', b'\x00\x01\x18\x00PENGUIN group', 1) unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP) diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 044f88d340..29644d14b3 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -61,11 +61,11 @@ HTML_MAP = invert_tag_map(maps.HTML_MAP) LIT_MAGIC = b'ITOLITLS' -LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}" -PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}" -PIECE4_GUID = "{0A9007C4-4076-11D3-8789-0000F8105754}" -DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" -LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" +LITFILE_GUID = '{0A9007C1-4076-11D3-8789-0000F8105754}' +PIECE3_GUID = '{0A9007C3-4076-11D3-8789-0000F8105754}' +PIECE4_GUID = '{0A9007C4-4076-11D3-8789-0000F8105754}' +DESENCRYPT_GUID = '{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}' +LZXCOMPRESS_GUID = '{0A9007C6-4076-11D3-8789-0000F8105754}' def packguid(guid): @@ -73,7 +73,7 @@ def packguid(guid): guid[20:22], guid[22:24], guid[25:27], guid[27:29], \ guid[29:31], guid[31:33], guid[33:35], guid[35:37] values = [int(value, 16) for value in values] - return pack(" 6: - self.logger.warn("More than six anchors in file %r. " - "Some links may not work properly." % self.item.href) + self.logger.warn('More than six anchors in file %r. ' + 'Some links may not work properly.' % self.item.href) data = io.BytesIO() data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8')) for anchor, offset in self.anchors: @@ -469,8 +472,8 @@ class LitWriter: self._add_folder('/data') for item in self._oeb.manifest.values(): if item.media_type not in LIT_MIMES: - self._logger.warn("File %r of unknown media-type %r " - "excluded from output." % (item.href, item.media_type)) + self._logger.warn('File %r of unknown media-type %r ' + 'excluded from output.' % (item.href, item.media_type)) continue name = '/data/' + item.id data = item.data @@ -579,7 +582,7 @@ class LitWriter: self._add_file('/DRMStorage/DRMSource', drmsource) tempkey = self._calculate_deskey([self._meta, drmsource]) msdes.deskey(tempkey, msdes.EN0) - self._add_file('/DRMStorage/DRMSealed', msdes.des(b"\0" * 16)) + self._add_file('/DRMStorage/DRMSealed', msdes.des(b'\0' * 16)) self._bookkey = b'\0' * 8 self._add_file('/DRMStorage/ValidationStream', b'MSReader', 3) @@ -651,11 +654,11 @@ class LitWriter: hash = mssha1.new() for data in hashdata: if prepad > 0: - data = (b"\000" * prepad) + data + data = (b'\000' * prepad) + data prepad = 0 postpad = 64 - (len(data) % 64) if postpad < 64: - data = data + (b"\000" * postpad) + data = data + (b'\000' * postpad) hash.update(data) digest = hash.digest() if not isinstance(digest, bytes): diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py index 4a324b3f40..865b6a31fc 100644 --- a/src/calibre/ebooks/lrf/__init__.py +++ b/src/calibre/ebooks/lrf/__init__.py @@ -1,16 +1,16 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -""" +''' This package contains logic to read and write LRF files. The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}. -""" +''' from calibre.ebooks import ConversionError from calibre.ebooks.lrf.fonts import FONT_FILE_MAP from calibre.ebooks.lrf.pylrs.pylrs import BlockStyle, Header, TextBlock, TextStyle from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book -__docformat__ = "epytext" +__docformat__ = 'epytext' class LRFParseError(Exception): @@ -28,15 +28,15 @@ class PRS500_PROFILE: line_space = 1.2 # : Default (in pt) header_font_size = 6 #: In pt header_height = 30 # : In px - default_fonts = {'sans': "Swis721 BT Roman", 'mono': "Courier10 BT Roman", - 'serif': "Dutch801 Rm BT Roman"} + default_fonts = {'sans': 'Swis721 BT Roman', 'mono': 'Courier10 BT Roman', + 'serif': 'Dutch801 Rm BT Roman'} name = 'prs500' def find_custom_fonts(options, logger): from calibre.utils.fonts.scanner import font_scanner - fonts = {'serif' : None, 'sans' : None, 'mono' : None} + fonts = {'serif': None, 'sans': None, 'mono': None} def family(cmd): return cmd.split(',')[-1].strip() @@ -106,7 +106,7 @@ def Book(options, logger, font_delta=0, header=None, for family in ['serif', 'sans', 'mono']: if not fonts[family]: - fonts[family] = {'normal' : (None, profile.default_fonts[family])} + fonts[family] = {'normal': (None, profile.default_fonts[family])} elif 'normal' not in fonts[family]: raise ConversionError('Could not find the normal version of the ' + family + ' font') return book, fonts diff --git a/src/calibre/ebooks/lrf/fonts.py b/src/calibre/ebooks/lrf/fonts.py index eafc65b95f..9cdda9b282 100644 --- a/src/calibre/ebooks/lrf/fonts.py +++ b/src/calibre/ebooks/lrf/fonts.py @@ -9,9 +9,9 @@ Default fonts used in the PRS500 LIBERATION_FONT_MAP = { - 'Swis721 BT Roman' : 'LiberationSans-Regular', - 'Dutch801 Rm BT Roman' : 'LiberationSerif-Regular', - 'Courier10 BT Roman' : 'LiberationMono-Regular', + 'Swis721 BT Roman' : 'LiberationSans-Regular', + 'Dutch801 Rm BT Roman': 'LiberationSerif-Regular', + 'Courier10 BT Roman' : 'LiberationMono-Regular', } FONT_FILE_MAP = {} diff --git a/src/calibre/ebooks/lrf/html/__init__.py b/src/calibre/ebooks/lrf/html/__init__.py index 9e110971bf..24cb7ee249 100644 --- a/src/calibre/ebooks/lrf/html/__init__.py +++ b/src/calibre/ebooks/lrf/html/__init__.py @@ -1,8 +1,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -""" +''' This package contains code to convert HTML ebooks to LRF ebooks. -""" +''' -__docformat__ = "epytext" -__author__ = "Kovid Goyal " +__docformat__ = 'epytext' +__author__ = 'Kovid Goyal ' diff --git a/src/calibre/ebooks/lrf/html/color_map.py b/src/calibre/ebooks/lrf/html/color_map.py index 0260fd4ab4..1fc4464256 100644 --- a/src/calibre/ebooks/lrf/html/color_map.py +++ b/src/calibre/ebooks/lrf/html/color_map.py @@ -4,97 +4,97 @@ __copyright__ = '2008, Kovid Goyal ' import re NAME_MAP = { - 'aliceblue': '#F0F8FF', - 'antiquewhite': '#FAEBD7', - 'aqua': '#00FFFF', - 'aquamarine': '#7FFFD4', - 'azure': '#F0FFFF', - 'beige': '#F5F5DC', - 'bisque': '#FFE4C4', - 'black': '#000000', - 'blanchedalmond': '#FFEBCD', - 'blue': '#0000FF', - 'brown': '#A52A2A', - 'burlywood': '#DEB887', - 'cadetblue': '#5F9EA0', - 'chartreuse': '#7FFF00', - 'chocolate': '#D2691E', - 'coral': '#FF7F50', - 'crimson': '#DC143C', - 'cyan': '#00FFFF', - 'darkblue': '#00008B', - 'darkgoldenrod': '#B8860B', - 'darkgreen': '#006400', - 'darkkhaki': '#BDB76B', - 'darkmagenta': '#8B008B', - 'darkolivegreen': '#556B2F', - 'darkorange': '#FF8C00', - 'darkorchid': '#9932CC', - 'darkred': '#8B0000', - 'darksalmon': '#E9967A', - 'darkslateblue': '#483D8B', - 'darkslategrey': '#2F4F4F', - 'darkviolet': '#9400D3', - 'deeppink': '#FF1493', - 'dodgerblue': '#1E90FF', - 'firebrick': '#B22222', - 'floralwhite': '#FFFAF0', - 'forestgreen': '#228B22', - 'fuchsia': '#FF00FF', - 'gainsboro': '#DCDCDC', - 'ghostwhite': '#F8F8FF', - 'gold': '#FFD700', - 'goldenrod': '#DAA520', - 'indianred ': '#CD5C5C', - 'indigo ': '#4B0082', - 'khaki': '#F0E68C', - 'lavenderblush': '#FFF0F5', - 'lawngreen': '#7CFC00', - 'lightblue': '#ADD8E6', - 'lightcoral': '#F08080', - 'lightgoldenrodyellow': '#FAFAD2', - 'lightgray': '#D3D3D3', - 'lightgrey': '#D3D3D3', - 'lightskyblue': '#87CEFA', - 'lightslategrey': '#778899', - 'lightsteelblue': '#B0C4DE', - 'lime': '#87CEFA', - 'linen': '#FAF0E6', - 'magenta': '#FF00FF', - 'maroon': '#800000', - 'mediumaquamarine': '#66CDAA', - 'mediumblue': '#0000CD', - 'mediumorchid': '#BA55D3', - 'mediumpurple': '#9370D8', - 'mediumseagreen': '#3CB371', - 'mediumslateblue': '#7B68EE', - 'midnightblue': '#191970', - 'moccasin': '#FFE4B5', - 'navajowhite': '#FFDEAD', - 'navy': '#000080', - 'oldlace': '#FDF5E6', - 'olive': '#808000', - 'orange': '#FFA500', - 'orangered': '#FF4500', - 'orchid': '#DA70D6', - 'paleturquoise': '#AFEEEE', - 'papayawhip': '#FFEFD5', - 'peachpuff': '#FFDAB9', - 'powderblue': '#B0E0E6', - 'rosybrown': '#BC8F8F', - 'royalblue': '#4169E1', - 'saddlebrown': '#8B4513', - 'sandybrown': '#8B4513', - 'seashell': '#FFF5EE', - 'sienna': '#A0522D', - 'silver': '#C0C0C0', - 'skyblue': '#87CEEB', - 'slategrey': '#708090', - 'snow': '#FFFAFA', - 'springgreen': '#00FF7F', - 'violet': '#EE82EE', - 'yellowgreen': '#9ACD32' - } + 'aliceblue': '#F0F8FF', + 'antiquewhite': '#FAEBD7', + 'aqua': '#00FFFF', + 'aquamarine': '#7FFFD4', + 'azure': '#F0FFFF', + 'beige': '#F5F5DC', + 'bisque': '#FFE4C4', + 'black': '#000000', + 'blanchedalmond': '#FFEBCD', + 'blue': '#0000FF', + 'brown': '#A52A2A', + 'burlywood': '#DEB887', + 'cadetblue': '#5F9EA0', + 'chartreuse': '#7FFF00', + 'chocolate': '#D2691E', + 'coral': '#FF7F50', + 'crimson': '#DC143C', + 'cyan': '#00FFFF', + 'darkblue': '#00008B', + 'darkgoldenrod': '#B8860B', + 'darkgreen': '#006400', + 'darkkhaki': '#BDB76B', + 'darkmagenta': '#8B008B', + 'darkolivegreen': '#556B2F', + 'darkorange': '#FF8C00', + 'darkorchid': '#9932CC', + 'darkred': '#8B0000', + 'darksalmon': '#E9967A', + 'darkslateblue': '#483D8B', + 'darkslategrey': '#2F4F4F', + 'darkviolet': '#9400D3', + 'deeppink': '#FF1493', + 'dodgerblue': '#1E90FF', + 'firebrick': '#B22222', + 'floralwhite': '#FFFAF0', + 'forestgreen': '#228B22', + 'fuchsia': '#FF00FF', + 'gainsboro': '#DCDCDC', + 'ghostwhite': '#F8F8FF', + 'gold': '#FFD700', + 'goldenrod': '#DAA520', + 'indianred ': '#CD5C5C', + 'indigo ': '#4B0082', + 'khaki': '#F0E68C', + 'lavenderblush': '#FFF0F5', + 'lawngreen': '#7CFC00', + 'lightblue': '#ADD8E6', + 'lightcoral': '#F08080', + 'lightgoldenrodyellow': '#FAFAD2', + 'lightgray': '#D3D3D3', + 'lightgrey': '#D3D3D3', + 'lightskyblue': '#87CEFA', + 'lightslategrey': '#778899', + 'lightsteelblue': '#B0C4DE', + 'lime': '#87CEFA', + 'linen': '#FAF0E6', + 'magenta': '#FF00FF', + 'maroon': '#800000', + 'mediumaquamarine': '#66CDAA', + 'mediumblue': '#0000CD', + 'mediumorchid': '#BA55D3', + 'mediumpurple': '#9370D8', + 'mediumseagreen': '#3CB371', + 'mediumslateblue': '#7B68EE', + 'midnightblue': '#191970', + 'moccasin': '#FFE4B5', + 'navajowhite': '#FFDEAD', + 'navy': '#000080', + 'oldlace': '#FDF5E6', + 'olive': '#808000', + 'orange': '#FFA500', + 'orangered': '#FF4500', + 'orchid': '#DA70D6', + 'paleturquoise': '#AFEEEE', + 'papayawhip': '#FFEFD5', + 'peachpuff': '#FFDAB9', + 'powderblue': '#B0E0E6', + 'rosybrown': '#BC8F8F', + 'royalblue': '#4169E1', + 'saddlebrown': '#8B4513', + 'sandybrown': '#8B4513', + 'seashell': '#FFF5EE', + 'sienna': '#A0522D', + 'silver': '#C0C0C0', + 'skyblue': '#87CEEB', + 'slategrey': '#708090', + 'snow': '#FFFAFA', + 'springgreen': '#00FF7F', + 'violet': '#EE82EE', + 'yellowgreen': '#9ACD32' +} hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})') rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index c4169d9a32..a1e7b8731c 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -46,12 +46,12 @@ from calibre.ptempfile import PersistentTemporaryFile from polyglot.builtins import itervalues, string_or_bytes from polyglot.urllib import unquote, urlparse -""" +''' Code to convert HTML ebooks into LRF ebooks. I am indebted to esperanc for the initial CSS->Xylog Style conversion code and to Falstaff for pylrs. -""" +''' from PIL import Image as PILImage @@ -98,7 +98,7 @@ def tag_regex(tagname): class HTMLConverter: - SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") + SELECTOR_PAT = re.compile(r'([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}') PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) @@ -108,7 +108,7 @@ class HTMLConverter: lambda match: ''), # Strip comments from